mirror of
https://github.com/salsa-rs/salsa.git
synced 2025-02-08 21:35:47 +00:00
2653 lines
190 KiB
HTML
2653 lines
190 KiB
HTML
<!DOCTYPE HTML>
|
|
<html lang="en" class="sidebar-visible no-js light">
|
|
<head>
|
|
<!-- Book generated using mdBook -->
|
|
<meta charset="UTF-8">
|
|
<title>Salsa</title>
|
|
<meta name="robots" content="noindex" />
|
|
|
|
|
|
<!-- Custom HTML head -->
|
|
|
|
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
|
<meta name="description" content="">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<meta name="theme-color" content="#ffffff" />
|
|
|
|
<link rel="icon" href="favicon.svg">
|
|
<link rel="shortcut icon" href="favicon.png">
|
|
<link rel="stylesheet" href="css/variables.css">
|
|
<link rel="stylesheet" href="css/general.css">
|
|
<link rel="stylesheet" href="css/chrome.css">
|
|
<link rel="stylesheet" href="css/print.css" media="print">
|
|
|
|
<!-- Fonts -->
|
|
<link rel="stylesheet" href="FontAwesome/css/font-awesome.css">
|
|
<link rel="stylesheet" href="fonts/fonts.css">
|
|
|
|
<!-- Highlight.js Stylesheets -->
|
|
<link rel="stylesheet" href="highlight.css">
|
|
<link rel="stylesheet" href="tomorrow-night.css">
|
|
<link rel="stylesheet" href="ayu-highlight.css">
|
|
|
|
<!-- Custom theme stylesheets -->
|
|
<link rel="stylesheet" href="mermaid.css">
|
|
|
|
</head>
|
|
<body>
|
|
<!-- Provide site root to javascript -->
|
|
<script type="text/javascript">
|
|
var path_to_root = "";
|
|
var default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? "navy" : "light";
|
|
</script>
|
|
|
|
<!-- Work around some values being stored in localStorage wrapped in quotes -->
|
|
<script type="text/javascript">
|
|
try {
|
|
var theme = localStorage.getItem('mdbook-theme');
|
|
var sidebar = localStorage.getItem('mdbook-sidebar');
|
|
|
|
if (theme.startsWith('"') && theme.endsWith('"')) {
|
|
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
|
|
}
|
|
|
|
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
|
|
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
|
|
}
|
|
} catch (e) { }
|
|
</script>
|
|
|
|
<!-- Set the theme before any content is loaded, prevents flash -->
|
|
<script type="text/javascript">
|
|
var theme;
|
|
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
|
|
if (theme === null || theme === undefined) { theme = default_theme; }
|
|
var html = document.querySelector('html');
|
|
html.classList.remove('no-js')
|
|
html.classList.remove('light')
|
|
html.classList.add(theme);
|
|
html.classList.add('js');
|
|
</script>
|
|
|
|
<!-- Hide / unhide sidebar before it is displayed -->
|
|
<script type="text/javascript">
|
|
var html = document.querySelector('html');
|
|
var sidebar = 'hidden';
|
|
if (document.body.clientWidth >= 1080) {
|
|
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
|
|
sidebar = sidebar || 'visible';
|
|
}
|
|
html.classList.remove('sidebar-visible');
|
|
html.classList.add("sidebar-" + sidebar);
|
|
</script>
|
|
|
|
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
|
|
<div class="sidebar-scrollbox">
|
|
<ol class="chapter"><li class="chapter-item expanded "><a href="about_salsa.html"><strong aria-hidden="true">1.</strong> About salsa</a></li><li class="chapter-item expanded affix "><li class="part-title">How to use Salsa</li><li class="chapter-item expanded "><a href="overview.html"><strong aria-hidden="true">2.</strong> Overview</a></li><li class="chapter-item expanded "><a href="tutorial.html"><strong aria-hidden="true">3.</strong> Tutorial: calc language</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="tutorial/structure.html"><strong aria-hidden="true">3.1.</strong> Basic structure</a></li><li class="chapter-item expanded "><a href="tutorial/jar.html"><strong aria-hidden="true">3.2.</strong> Jars and databases</a></li><li class="chapter-item expanded "><a href="tutorial/db.html"><strong aria-hidden="true">3.3.</strong> Defining the database struct</a></li><li class="chapter-item expanded "><a href="tutorial/ir.html"><strong aria-hidden="true">3.4.</strong> Defining the IR: the various "salsa structs"</a></li><li class="chapter-item expanded "><a href="tutorial/parser.html"><strong aria-hidden="true">3.5.</strong> Defining the parser: memoized functions and inputs</a></li><li class="chapter-item expanded "><a href="tutorial/accumulators.html"><strong aria-hidden="true">3.6.</strong> Defining the parser: reporting errors</a></li><li class="chapter-item expanded "><a href="tutorial/debug.html"><strong aria-hidden="true">3.7.</strong> Defining the parser: debug impls and testing</a></li><li class="chapter-item expanded "><a href="tutorial/checker.html"><strong aria-hidden="true">3.8.</strong> Defining the checker</a></li><li class="chapter-item expanded "><a href="tutorial/interpreter.html"><strong aria-hidden="true">3.9.</strong> Defining the interpreter</a></li></ol></li><li class="chapter-item expanded "><a href="reference.html"><strong aria-hidden="true">4.</strong> Reference</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="reference/durability.html"><strong aria-hidden="true">4.1.</strong> Durability</a></li><li class="chapter-item expanded "><a href="reference/algorithm.html"><strong aria-hidden="true">4.2.</strong> Algorithm</a></li></ol></li><li class="chapter-item expanded "><a href="common_patterns.html"><strong aria-hidden="true">5.</strong> Common patterns</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="common_patterns/selection.html"><strong aria-hidden="true">5.1.</strong> Selection</a></li><li class="chapter-item expanded "><a href="common_patterns/on_demand_inputs.html"><strong aria-hidden="true">5.2.</strong> On-demand (Lazy) inputs</a></li></ol></li><li class="chapter-item expanded "><a href="tuning.html"><strong aria-hidden="true">6.</strong> Tuning</a></li><li class="chapter-item expanded "><a href="cycles.html"><strong aria-hidden="true">7.</strong> Cycle handling</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="cycles/fallback.html"><strong aria-hidden="true">7.1.</strong> Recovering via fallback</a></li></ol></li><li class="chapter-item expanded "><li class="part-title">How Salsa works internally</li><li class="chapter-item expanded "><a href="how_salsa_works.html"><strong aria-hidden="true">8.</strong> How Salsa works</a></li><li class="chapter-item expanded "><a href="videos.html"><strong aria-hidden="true">9.</strong> Videos</a></li><li class="chapter-item expanded "><a href="plumbing.html"><strong aria-hidden="true">10.</strong> Plumbing</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="plumbing/jars_and_ingredients.html"><strong aria-hidden="true">10.1.</strong> Jars and ingredients</a></li><li class="chapter-item expanded "><a href="plumbing/database_and_runtime.html"><strong aria-hidden="true">10.2.</strong> Databases and runtime</a></li><li class="chapter-item expanded "><a href="plumbing/db_lifetime.html"><strong aria-hidden="true">10.3.</strong> The db lifetime on tracked/interned structs</a></li><li class="chapter-item expanded "><a href="plumbing/tracked_structs.html"><strong aria-hidden="true">10.4.</strong> Tracked structures</a></li><li class="chapter-item expanded "><a href="plumbing/query_ops.html"><strong aria-hidden="true">10.5.</strong> Query operations</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="plumbing/maybe_changed_after.html"><strong aria-hidden="true">10.5.1.</strong> maybe changed after</a></li><li class="chapter-item expanded "><a href="plumbing/fetch.html"><strong aria-hidden="true">10.5.2.</strong> Fetch</a></li><li class="chapter-item expanded "><a href="plumbing/derived_flowchart.html"><strong aria-hidden="true">10.5.3.</strong> Derived queries flowchart</a></li><li class="chapter-item expanded "><a href="plumbing/cycles.html"><strong aria-hidden="true">10.5.4.</strong> Cycle handling</a></li></ol></li><li class="chapter-item expanded "><a href="plumbing/terminology.html"><strong aria-hidden="true">10.6.</strong> Terminology</a></li><li><ol class="section"><li class="chapter-item expanded "><a href="plumbing/terminology/backdate.html"><strong aria-hidden="true">10.6.1.</strong> Backdate</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/changed_at.html"><strong aria-hidden="true">10.6.2.</strong> Changed at</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/dependency.html"><strong aria-hidden="true">10.6.3.</strong> Dependency</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/derived_query.html"><strong aria-hidden="true">10.6.4.</strong> Derived query</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/durability.html"><strong aria-hidden="true">10.6.5.</strong> Durability</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/input_query.html"><strong aria-hidden="true">10.6.6.</strong> Input query</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/ingredient.html"><strong aria-hidden="true">10.6.7.</strong> Ingredient</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/LRU.html"><strong aria-hidden="true">10.6.8.</strong> LRU</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/memo.html"><strong aria-hidden="true">10.6.9.</strong> Memo</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/query.html"><strong aria-hidden="true">10.6.10.</strong> Query</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/query_function.html"><strong aria-hidden="true">10.6.11.</strong> Query function</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/revision.html"><strong aria-hidden="true">10.6.12.</strong> Revision</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/salsa_item.html"><strong aria-hidden="true">10.6.13.</strong> Salsa item</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/salsa_struct.html"><strong aria-hidden="true">10.6.14.</strong> Salsa struct</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/untracked.html"><strong aria-hidden="true">10.6.15.</strong> Untracked dependency</a></li><li class="chapter-item expanded "><a href="plumbing/terminology/verified.html"><strong aria-hidden="true">10.6.16.</strong> Verified</a></li></ol></li></ol></li><li class="chapter-item expanded "><li class="part-title">Appendices</li><li class="chapter-item expanded "><a href="meta.html"><strong aria-hidden="true">11.</strong> Meta: about the book itself</a></li></ol> </div>
|
|
<div id="sidebar-resize-handle" class="sidebar-resize-handle"></div>
|
|
</nav>
|
|
|
|
<div id="page-wrapper" class="page-wrapper">
|
|
|
|
<div class="page">
|
|
|
|
<div id="menu-bar-hover-placeholder"></div>
|
|
<div id="menu-bar" class="menu-bar sticky bordered">
|
|
<div class="left-buttons">
|
|
<button id="sidebar-toggle" class="icon-button" type="button" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
|
|
<i class="fa fa-bars"></i>
|
|
</button>
|
|
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
|
|
<i class="fa fa-paint-brush"></i>
|
|
</button>
|
|
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
|
|
<li role="none"><button role="menuitem" class="theme" id="light">Light (default)</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
|
|
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
|
|
</ul>
|
|
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
|
|
<i class="fa fa-search"></i>
|
|
</button>
|
|
</div>
|
|
|
|
<h1 class="menu-title">Salsa</h1>
|
|
|
|
<div class="right-buttons">
|
|
<a href="print.html" title="Print this book" aria-label="Print this book">
|
|
<i id="print-button" class="fa fa-print"></i>
|
|
</a>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
<div id="search-wrapper" class="hidden">
|
|
<form id="searchbar-outer" class="searchbar-outer">
|
|
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
|
|
</form>
|
|
<div id="searchresults-outer" class="searchresults-outer hidden">
|
|
<div id="searchresults-header" class="searchresults-header"></div>
|
|
<ul id="searchresults">
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
|
|
<script type="text/javascript">
|
|
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
|
|
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
|
|
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
|
|
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
|
|
});
|
|
</script>
|
|
|
|
<div id="content" class="content">
|
|
<main>
|
|
<h1 id="about-salsa"><a class="header" href="#about-salsa">About salsa</a></h1>
|
|
<p>Salsa is a Rust framework for writing incremental, on-demand programs
|
|
-- these are programs that want to adapt to changes in their inputs,
|
|
continuously producing a new output that is up-to-date. Salsa is based
|
|
on the the incremental recompilation techniques that we built for
|
|
rustc, and many (but not all) of its users are building compilers or
|
|
other similar tooling.</p>
|
|
<p>If you'd like to learn more about Salsa, check out:</p>
|
|
<ul>
|
|
<li>The <a href="./overview.html">overview</a>, for a brief summary.</li>
|
|
<li>The <a href="./tutorial.html">tutorial</a>, for a detailed look.</li>
|
|
<li>You can also watch some of our <a href="./videos.html">videos</a>, though the content there is rather out of date.</li>
|
|
</ul>
|
|
<p>If you'd like to chat about Salsa, or you think you might like to
|
|
contribute, please jump on to our Zulip instance at
|
|
<a href="https://salsa.zulipchat.com/">salsa.zulipchat.com</a>.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="salsa-overview"><a class="header" href="#salsa-overview">Salsa overview</a></h1>
|
|
<blockquote>
|
|
<p>⚠️ <strong>IN-PROGRESS VERSION OF SALSA.</strong> ⚠️</p>
|
|
<p>This page describes the unreleased "Salsa 2022" version, which is a major departure from older versions of salsa. The code here works but is only available on github and from the <code>salsa-2022</code> crate.</p>
|
|
<p>If you are looking for the older version of salsa, simply visit <a href="https://salsa-rs.netlify.app/salsa">this link</a></p>
|
|
</blockquote>
|
|
<p>This page contains a brief overview of the pieces of a Salsa program.
|
|
For a more detailed look, check out the <a href="./tutorial.html">tutorial</a>, which walks through the creation of an entire project end-to-end.</p>
|
|
<h2 id="goal-of-salsa"><a class="header" href="#goal-of-salsa">Goal of Salsa</a></h2>
|
|
<p>The goal of Salsa is to support efficient <strong>incremental recomputation</strong>.
|
|
Salsa is used in rust-analyzer, for example, to help it recompile your program quickly as you type.</p>
|
|
<p>The basic idea of a Salsa program is like this:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>let mut input = ...;
|
|
loop {
|
|
let output = your_program(&input);
|
|
modify(&mut input);
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>You start out with an input that has some value.
|
|
You invoke your program to get back a result.
|
|
Some time later, you modify the input and invoke your program again.
|
|
<strong>Our goal is to make this second call faster by re-using some of the results from the first call.</strong></p>
|
|
<p>In reality, of course, you can have many inputs and "your program" may be many different methods and functions defined on those inputs.
|
|
But this picture still conveys a few important concepts:</p>
|
|
<ul>
|
|
<li>Salsa separates out the "incremental computation" (the function <code>your_program</code>) from some outer loop that is defining the inputs.</li>
|
|
<li>Salsa gives you the tools to define <code>your_program</code>.</li>
|
|
<li>Salsa assumes that <code>your_program</code> is a purely deterministic function of its inputs, or else this whole setup makes no sense.</li>
|
|
<li>The mutation of inputs always happens outside of <code>your_program</code>, as part of this master loop.</li>
|
|
</ul>
|
|
<h2 id="database"><a class="header" href="#database">Database</a></h2>
|
|
<p>Each time you run your program, Salsa remembers the values of each computation in a <strong>database</strong>.
|
|
When the inputs change, it consults this database to look for values that can be reused.
|
|
The database is also used to implement interning (making a canonical version of a value that can be copied around and cheaply compared for equality) and other convenient Salsa features.</p>
|
|
<h2 id="inputs"><a class="header" href="#inputs">Inputs</a></h2>
|
|
<p>Every Salsa program begins with an <strong>input</strong>.
|
|
Inputs are special structs that define the starting point of your program.
|
|
Everything else in your program is ultimately a deterministic function of these inputs.</p>
|
|
<p>For example, in a compiler, there might be an input defining the contents of a file on disk:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::input]
|
|
pub struct ProgramFile {
|
|
pub path: PathBuf,
|
|
pub contents: String,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>You create an input by using the <code>new</code> method.
|
|
Because the values of input fields are stored in the database, you also give an <code>&</code>-reference to the database:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>let file: ProgramFile = ProgramFile::new(
|
|
&db,
|
|
PathBuf::from("some_path.txt"),
|
|
String::from("fn foo() { }"),
|
|
);
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Mutable access is not needed since creating a new input cannot affect existing tracked data in the database.</p>
|
|
<h3 id="salsa-structs-are-just-integers"><a class="header" href="#salsa-structs-are-just-integers">Salsa structs are just integers</a></h3>
|
|
<p>The <code>ProgramFile</code> struct generated by the <code>salsa::input</code> macro doesn't actually store any data. It's just a newtyped integer id:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>// Generated by the `#[salsa::input]` macro:
|
|
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
pub struct ProgramFile(salsa::Id);
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>This means that, when you have a <code>ProgramFile</code>, you can easily copy it around and put it wherever you like.
|
|
To actually read any of its fields, however, you will need to use the database and a getter method.</p>
|
|
<h3 id="reading-fields-and-return_ref"><a class="header" href="#reading-fields-and-return_ref">Reading fields and <code>return_ref</code></a></h3>
|
|
<p>You can access the value of an input's fields by using the getter method.
|
|
As this is only reading the field, it just needs a <code>&</code>-reference to the database:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>let contents: String = file.contents(&db);
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Invoking the accessor clones the value from the database.
|
|
Sometimes this is not what you want, so you can annotate fields with <code>#[return_ref]</code> to indicate that they should return a reference into the database instead:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::input]
|
|
pub struct ProgramFile {
|
|
pub path: PathBuf,
|
|
#[return_ref]
|
|
pub contents: String,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Now <code>file.contents(&db)</code> will return an <code>&String</code>.</p>
|
|
<p>You can also use the <code>data</code> method to access the entire struct:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>file.data(&db)
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<h3 id="writing-input-fields"><a class="header" href="#writing-input-fields">Writing input fields</a></h3>
|
|
<p>Finally, you can also modify the value of an input field by using the setter method.
|
|
Since this is modifying the input, and potentially invalidating data derived from it,
|
|
the setter takes an <code>&mut</code>-reference to the database:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>file.set_contents(&mut db).to(String::from("fn foo() { /* add a comment */ }"));
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Note that the setter method <code>set_contents</code> returns a "builder".
|
|
This gives the ability to set the <a href="./reference/durability.html">durability</a> and other advanced concepts.</p>
|
|
<h2 id="tracked-functions"><a class="header" href="#tracked-functions">Tracked functions</a></h2>
|
|
<p>Once you've defined your inputs, the next thing to define are <strong>tracked functions</strong>:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
fn parse_file(db: &dyn crate::Db, file: ProgramFile) -> Ast {
|
|
let contents: &str = file.contents(db);
|
|
...
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>When you call a tracked function, Salsa will track which inputs it accesses (in this example, <code>file.contents(db)</code>).
|
|
It will also memoize the return value (the <code>Ast</code>, in this case).
|
|
If you call a tracked function twice, Salsa checks if the inputs have changed; if not, it can return the memoized value.
|
|
The algorithm Salsa uses to decide when a tracked function needs to be re-executed is called the <a href="./reference/algorithm.html">red-green algorithm</a>, and it's where the name Salsa comes from.</p>
|
|
<p>Tracked functions have to follow a particular structure:</p>
|
|
<ul>
|
|
<li>They must take a <code>&</code>-reference to the database as their first argument.
|
|
<ul>
|
|
<li>Note that because this is an <code>&</code>-reference, it is not possible to modify inputs during a tracked function!</li>
|
|
</ul>
|
|
</li>
|
|
<li>They must take a "Salsa struct" as the second argument -- in our example, this is an input struct, but there are other kinds of Salsa structs we'll describe shortly.</li>
|
|
<li>They <em>can</em> take additional arguments, but it's faster and better if they don't.</li>
|
|
</ul>
|
|
<p>Tracked functions can return any clone-able type. A clone is required since, when the value is cached, the result will be cloned out of the database. Tracked functions can also be annotated with <code>#[return_ref]</code> if you would prefer to return a reference into the database instead (if <code>parse_file</code> were so annotated, then callers would actually get back an <code>&Ast</code>, for example).</p>
|
|
<h2 id="tracked-structs"><a class="header" href="#tracked-structs">Tracked structs</a></h2>
|
|
<p><strong>Tracked structs</strong> are intermediate structs created during your computation.
|
|
Like inputs, their fields are stored inside the database, and the struct itself just wraps an id.
|
|
Unlike inputs, they can only be created inside a tracked function, and their fields can never change once they are created (until the next revision, at least).
|
|
Getter methods are provided to read the fields, but there are no setter methods.
|
|
Example:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
struct Ast<'db> {
|
|
#[return_ref]
|
|
top_level_items: Vec<Item>,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Just as with an input, new values are created by invoking <code>Ast::new</code>.
|
|
Unlike with an input, the <code>new</code> for a tracked struct only requires a <code>&</code>-reference to the database:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
fn parse_file(db: &dyn crate::Db, file: ProgramFile) -> Ast {
|
|
let contents: &str = file.contents(db);
|
|
let parser = Parser::new(contents);
|
|
let mut top_level_items = vec![];
|
|
while let Some(item) = parser.parse_top_level_item() {
|
|
top_level_items.push(item);
|
|
}
|
|
Ast::new(db, top_level_items) // <-- create an Ast!
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<h3 id="id-fields"><a class="header" href="#id-fields"><code>#[id]</code> fields</a></h3>
|
|
<p>When a tracked function is re-executed because its inputs have changed, the tracked structs it creates in the new execution are matched against those from the old execution, and the values of their fields are compared.
|
|
If the field values have not changed, then other tracked functions that only read those fields will not be re-executed.</p>
|
|
<p>Normally, tracked structs are matched up by the order in which they are created.
|
|
For example, the first <code>Ast</code> that is created by <code>parse_file</code> in the old execution will be matched against the first <code>Ast</code> created by <code>parse_file</code> in the new execution.
|
|
In our example, <code>parse_file</code> only ever creates a single <code>Ast</code>, so this works great.
|
|
Sometimes, however, it doesn't work so well.
|
|
For example, imagine that we had a tracked struct for items in the file:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
struct Item {
|
|
name: Word, // we'll define Word in a second!
|
|
...
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Maybe our parser first creates an <code>Item</code> with the name <code>foo</code> and then later a second <code>Item</code> with the name <code>bar</code>.
|
|
Then the user changes the input to reorder the functions.
|
|
Although we are still creating the same number of items, we are now creating them in the reverse order, so the naive algorithm will match up the <em>old</em> <code>foo</code> struct with the new <code>bar</code> struct.
|
|
This will look to Salsa as though the <code>foo</code> function was renamed to <code>bar</code> and the <code>bar</code> function was renamed to <code>foo</code>.
|
|
We'll still get the right result, but we might do more recomputation than we needed to do if we understood that they were just reordered.</p>
|
|
<p>To address this, you can tag fields in a tracked struct as <code>#[id]</code>. These fields are then used to "match up" struct instances across executions:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
struct Item {
|
|
#[id]
|
|
name: Word, // we'll define Word in a second!
|
|
...
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<h3 id="specify-the-result-of-tracked-functions-for-particular-structs"><a class="header" href="#specify-the-result-of-tracked-functions-for-particular-structs">Specify the result of tracked functions for particular structs</a></h3>
|
|
<p>Sometimes it is useful to define a tracked function but specify its value for some particular struct specially.
|
|
For example, maybe the default way to compute the representation for a function is to read the AST, but you also have some built-in functions in your language and you want to hard-code their results.
|
|
This can also be used to simulate a field that is initialized after the tracked struct is created.</p>
|
|
<p>To support this use case, you can use the <code>specify</code> method associated with tracked functions.
|
|
To enable this method, you need to add the <code>specify</code> flag to the function to alert users that its value may sometimes be specified externally.</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked(specify)] // <-- specify flag required
|
|
fn representation(db: &dyn crate::Db, item: Item) -> Representation {
|
|
// read the user's input AST by default
|
|
let ast = ast(db, item);
|
|
// ...
|
|
}
|
|
|
|
fn create_builtin_item(db: &dyn crate::Db) -> Item {
|
|
let i = Item::new(db, ...);
|
|
let r = hardcoded_representation();
|
|
representation::specify(db, i, r); // <-- use the method!
|
|
i
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Specifying is only possible for tracked functions that take a single tracked struct as an argument (besides the database).</p>
|
|
<h2 id="interned-structs"><a class="header" href="#interned-structs">Interned structs</a></h2>
|
|
<p>The final kind of Salsa struct are <strong>interned structs</strong>.
|
|
Interned structs are useful for quick equality comparison.
|
|
They are commonly used to represent strings or other primitive values.</p>
|
|
<p>Most compilers, for example, will define a type to represent a user identifier:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::interned]
|
|
struct Word {
|
|
#[return_ref]
|
|
pub text: String,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>As with input and tracked structs, the <code>Word</code> struct itself is just a newtyped integer, and the actual data is stored in the database.</p>
|
|
<p>You can create a new interned struct using <code>new</code>, just like with input and tracked structs:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>let w1 = Word::new(db, "foo".to_string());
|
|
let w2 = Word::new(db, "bar".to_string());
|
|
let w3 = Word::new(db, "foo".to_string());
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>When you create two interned structs with the same field values, you are guaranteed to get back the same integer id. So here, we know that <code>assert_eq!(w1, w3)</code> is true and <code>assert_ne!(w1, w2)</code>.</p>
|
|
<p>You can access the fields of an interned struct using a getter, like <code>word.text(db)</code>. These getters respect the <code>#[return_ref]</code> annotation. Like tracked structs, the fields of interned structs are immutable.</p>
|
|
<h2 id="accumulators"><a class="header" href="#accumulators">Accumulators</a></h2>
|
|
<p>The final Salsa concept are <strong>accumulators</strong>. Accumulators are a way to report errors or other "side channel" information that is separate from the main return value of your function.</p>
|
|
<p>To create an accumulator, you declare a type as an <em>accumulator</em>:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::accumulator]
|
|
pub struct Diagnostics(String);
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>It must be a newtype of something, like <code>String</code>. Now, during a tracked function's execution, you can push those values:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>Diagnostics::push(db, "some_string".to_string())
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Then later, from outside the execution, you can ask for the set of diagnostics that were accumulated by some particular tracked function. For example, imagine that we have a type-checker and, during type-checking, it reports some diagnostics:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
fn type_check(db: &dyn Db, item: Item) {
|
|
// ...
|
|
Diagnostics::push(db, "some error message".to_string())
|
|
// ...
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>we can then later invoke the associated <code>accumulated</code> function to get all the <code>String</code> values that were pushed:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>let v: Vec<String> = type_check::accumulated::<Diagnostics>(db);
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="tutorial-calc"><a class="header" href="#tutorial-calc">Tutorial: calc</a></h1>
|
|
<blockquote>
|
|
<p>⚠️ <strong>IN-PROGRESS VERSION OF SALSA.</strong> ⚠️</p>
|
|
<p>This page describes the unreleased "Salsa 2022" version, which is a major departure from older versions of salsa. The code here works but is only available on github and from the <code>salsa-2022</code> crate.</p>
|
|
<p>If you are looking for the older version of salsa, simply visit <a href="https://salsa-rs.netlify.app/salsa">this link</a></p>
|
|
</blockquote>
|
|
<p>This tutorial walks through an end-to-end example of using Salsa.
|
|
It does not assume you know anything about salsa,
|
|
but reading the <a href="./overview.html">overview</a> first is probably a good idea to get familiar with the basic concepts.</p>
|
|
<p>Our goal is define a compiler/interpreter for a simple language called <code>calc</code>.
|
|
The <code>calc</code> compiler takes programs like the following and then parses and executes them:</p>
|
|
<pre><code>fn area_rectangle(w, h) = w * h
|
|
fn area_circle(r) = 3.14 * r * r
|
|
print area_rectangle(3, 4)
|
|
print area_circle(1)
|
|
print 11 * 2
|
|
</code></pre>
|
|
<p>When executed, this program prints <code>12</code>, <code>3.14</code>, and <code>22</code>.</p>
|
|
<p>If the program contains errors (e.g., a reference to an undefined function), it prints those out too.
|
|
And, of course, it will be reactive, so small changes to the input don't require recompiling (or rexecuting, necessarily) the entire thing.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="basic-structure"><a class="header" href="#basic-structure">Basic structure</a></h1>
|
|
<p>Before we do anything with Salsa, let's talk about the basic structure of the calc compiler.
|
|
Part of Salsa's design is that you are able to write programs that feel 'pretty close' to what a natural Rust program looks like.</p>
|
|
<h2 id="example-program"><a class="header" href="#example-program">Example program</a></h2>
|
|
<p>This is our example calc program:</p>
|
|
<pre><code>x = 5
|
|
y = 10
|
|
z = x + y * 3
|
|
print z
|
|
</code></pre>
|
|
<h2 id="parser"><a class="header" href="#parser">Parser</a></h2>
|
|
<p>The calc compiler takes as input a program, represented by a string:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>struct ProgramSource {
|
|
text: String
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>The first thing it does it to parse that string into a series of statements that look something like the following pseudo-Rust:<sup class="footnote-reference"><a href="#lexer">1</a></sup></p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>enum Statement {
|
|
/// Defines `fn <name>(<args>) = <body>`
|
|
Function(Function),
|
|
/// Defines `print <expr>`
|
|
Print(Expression),
|
|
}
|
|
|
|
/// Defines `fn <name>(<args>) = <body>`
|
|
struct Function {
|
|
name: FunctionId,
|
|
args: Vec<VariableId>,
|
|
body: Expression
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>where an expression is something like this (pseudo-Rust, because the <code>Expression</code> enum is recursive):</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>enum Expression {
|
|
Op(Expression, Op, Expression),
|
|
Number(f64),
|
|
Variable(VariableId),
|
|
Call(FunctionId, Vec<Expression>),
|
|
}
|
|
|
|
enum Op {
|
|
Add,
|
|
Subtract,
|
|
Multiply,
|
|
Divide,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Finally, for function/variable names, the <code>FunctionId</code> and <code>VariableId</code> types will be interned strings:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>type FunctionId = /* interned string */;
|
|
type VariableId = /* interned string */;
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<div class="footnote-definition" id="lexer"><sup class="footnote-definition-label">1</sup>
|
|
<p>Because calc is so simple, we don't have to bother separating out the lexer from the parser.</p>
|
|
</div>
|
|
<h2 id="checker"><a class="header" href="#checker">Checker</a></h2>
|
|
<p>The "checker" has the job of ensuring that the user only references variables that have been defined.
|
|
We're going to write the checker in a "context-less" style,
|
|
which is a bit less intuitive but allows for more incremental re-use.
|
|
The idea is to compute, for a given expression, which variables it references.
|
|
Then there is a function <code>check</code> which ensures that those variables are a subset of those that are already defined.</p>
|
|
<h2 id="interpreter"><a class="header" href="#interpreter">Interpreter</a></h2>
|
|
<p>The interpreter will execute the program and print the result. We don't bother with much incremental re-use here,
|
|
though it's certainly possible.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="jars-and-databases"><a class="header" href="#jars-and-databases">Jars and databases</a></h1>
|
|
<p>Before we can define the interesting parts of our Salsa program, we have to setup a bit of structure that defines the Salsa <strong>database</strong>.
|
|
The database is a struct that ultimately stores all of Salsa's intermediate state, such as the memoized return values from <a href="tutorial/../overview.html#tracked-functions">tracked functions</a>.</p>
|
|
<p>The database itself is defined in terms of intermediate structures, called <strong>jars</strong><sup class="footnote-reference"><a href="#jar">1</a></sup>, which themselves contain the data for each function.
|
|
This setup allows Salsa programs to be divided amongst many crates.
|
|
Typically, you define one jar struct per crate, and then when you construct the final database, you simply list the jar structs.
|
|
This permits the crates to define private functions and other things that are members of the jar struct, but not known directly to the database.</p>
|
|
<div class="footnote-definition" id="jar"><sup class="footnote-definition-label">1</sup>
|
|
<p>Jars of salsa -- get it? Get it??<sup class="footnote-reference"><a href="#java">2</a></sup></p>
|
|
</div>
|
|
<div class="footnote-definition" id="java"><sup class="footnote-definition-label">2</sup>
|
|
<p>OK, maybe it also brings to mind Java <code>.jar</code> files, but there's no real relationship. A jar is just a Rust struct, not a packaging format.</p>
|
|
</div>
|
|
<h2 id="defining-a-jar-struct"><a class="header" href="#defining-a-jar-struct">Defining a jar struct</a></h2>
|
|
<p>To define a jar struct, you create a tuple struct with the <code>#[salsa::jar]</code> annotation:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::jar(db = Db)]
|
|
pub struct Jar(
|
|
crate::compile::compile,
|
|
crate::ir::SourceProgram,
|
|
crate::ir::Program<'_>,
|
|
crate::ir::VariableId<'_>,
|
|
crate::ir::FunctionId<'_>,
|
|
crate::ir::Function<'_>,
|
|
crate::ir::Diagnostics,
|
|
crate::ir::Span<'_>,
|
|
crate::parser::parse_statements,
|
|
crate::type_check::type_check_program,
|
|
crate::type_check::type_check_function,
|
|
crate::type_check::find_function,
|
|
);
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Although it's not required, it's highly recommended to put the <code>jar</code> struct at the root of your crate, so that it can be referred to as <code>crate::Jar</code>.
|
|
All of the other Salsa annotations reference a jar struct, and they all default to the path <code>crate::Jar</code>.
|
|
If you put the jar somewhere else, you will have to override that default.</p>
|
|
<h2 id="defining-the-database-trait"><a class="header" href="#defining-the-database-trait">Defining the database trait</a></h2>
|
|
<p>The <code>#[salsa::jar]</code> annotation also includes a <code>db = Db</code> field.
|
|
The value of this field (normally <code>Db</code>) is the name of a trait that represents the database.
|
|
Salsa programs never refer <em>directly</em> to the database; instead, they take a <code>&dyn Db</code> argument.
|
|
This allows for separate compilation, where you have a database that contains the data for two jars, but those jars don't depend on one another.</p>
|
|
<p>The database trait for our <code>calc</code> crate is very simple:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>pub trait Db: salsa::DbWithJar<Jar> {}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>When you define a database trait like <code>Db</code>, the one thing that is required is that it must have a supertrait <code>salsa::DbWithJar<Jar></code>,
|
|
where <code>Jar</code> is the jar struct. If your jar depends on other jars, you can have multiple such supertraits (e.g., <code>salsa::DbWithJar<other_crate::Jar></code>).</p>
|
|
<p>Typically the <code>Db</code> trait has no other members or supertraits, but you are also free to add whatever other things you want in the trait.
|
|
When you define your final database, it will implement the trait, and you can then define the implementation of those other things.
|
|
This allows you to create a way for your jar to request context or other info from the database that is not moderated through Salsa,
|
|
should you need that.</p>
|
|
<h2 id="implementing-the-database-trait-for-the-jar"><a class="header" href="#implementing-the-database-trait-for-the-jar">Implementing the database trait for the jar</a></h2>
|
|
<p>The <code>Db</code> trait must be implemented by the database struct.
|
|
We're going to define the database struct in a <a href="tutorial/./db.html">later section</a>,
|
|
and one option would be to simply implement the jar <code>Db</code> trait there.
|
|
However, since we don't define any custom logic in the trait,
|
|
a common choice is to write a blanket impl for any type that implements <code>DbWithJar<Jar></code>,
|
|
and that's what we do here:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>impl<DB> Db for DB where DB: ?Sized + salsa::DbWithJar<Jar> {}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<h2 id="summary"><a class="header" href="#summary">Summary</a></h2>
|
|
<p>If the concept of a jar seems a bit abstract to you, don't overthink it. The TL;DR is that when you create a Salsa program, you need to perform the following steps:</p>
|
|
<ul>
|
|
<li>In each of your crates:
|
|
<ul>
|
|
<li>Define a <code>#[salsa::jar(db = Db)]</code> struct, typically at <code>crate::Jar</code>, and list each of your various Salsa-annotated things inside of it.</li>
|
|
<li>Define a <code>Db</code> trait, typically at <code>crate::Db</code>, that you will use in memoized functions and elsewhere to refer to the database struct.</li>
|
|
</ul>
|
|
</li>
|
|
<li>Once, typically in your final crate:
|
|
<ul>
|
|
<li>Define a database <code>D</code>, as described in the <a href="tutorial/./db.html">next section</a>, that will contain a list of each of the jars for each of your crates.</li>
|
|
<li>Implement the <code>Db</code> traits for each jar for your database type <code>D</code> (often we do this through blanket impls in the jar crates).</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-database-struct"><a class="header" href="#defining-the-database-struct">Defining the database struct</a></h1>
|
|
<p>Now that we have defined a <a href="tutorial/./jar.html">jar</a>, we need to create the <strong>database struct</strong>.
|
|
The database struct is where all the jars come together.
|
|
Typically it is only used by the "driver" of your application;
|
|
the one which starts up the program, supplies the inputs, and relays the outputs.</p>
|
|
<p>In <code>calc</code>, the database struct is in the <a href="https://github.com/salsa-rs/salsa/blob/master/examples-2022/calc/src/db.rs"><code>db</code></a> module, and it looks like this:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[derive(Default)]
|
|
#[salsa::db(crate::Jar)]
|
|
pub(crate) struct Database {
|
|
storage: salsa::Storage<Self>,
|
|
|
|
// The logs are only used for testing and demonstrating reuse:
|
|
//
|
|
logs: Option<Arc<Mutex<Vec<String>>>>,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>The <code>#[salsa::db(...)]</code> attribute takes a list of all the jars to include.
|
|
The struct must have a field named <code>storage</code> whose type is <code>salsa::Storage<Self></code>, but it can also contain whatever other fields you want.
|
|
The <code>storage</code> struct owns all the data for the jars listed in the <code>db</code> attribute.</p>
|
|
<p>The <code>salsa::db</code> attribute autogenerates a bunch of impls for things like the <code>salsa::HasJar<crate::Jar></code> trait that we saw earlier.</p>
|
|
<h2 id="implementing-the-salsadatabase-trait"><a class="header" href="#implementing-the-salsadatabase-trait">Implementing the <code>salsa::Database</code> trait</a></h2>
|
|
<p>In addition to the struct itself, we must add an impl of <code>salsa::Database</code>:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>impl salsa::Database for Database {
|
|
fn salsa_event(&self, event: salsa::Event) {
|
|
eprintln!("Event: {event:?}");
|
|
// Log interesting events, if logging is enabled
|
|
if let Some(logs) = &self.logs {
|
|
// don't log boring events
|
|
if let salsa::EventKind::WillExecute { .. } = event.kind {
|
|
logs.lock()
|
|
.unwrap()
|
|
.push(format!("Event: {:?}", event.debug(self)));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<h2 id="implementing-the-salsaparalleldatabase-trait"><a class="header" href="#implementing-the-salsaparalleldatabase-trait">Implementing the <code>salsa::ParallelDatabase</code> trait</a></h2>
|
|
<p>If you want to permit accessing your database from multiple threads at once, then you also need to implement the <code>ParallelDatabase</code> trait:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>impl salsa::ParallelDatabase for Database {
|
|
fn snapshot(&self) -> salsa::Snapshot<Self> {
|
|
salsa::Snapshot::new(Database {
|
|
storage: self.storage.snapshot(),
|
|
logs: self.logs.clone(),
|
|
})
|
|
}
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<h2 id="implementing-the-traits-for-each-jar"><a class="header" href="#implementing-the-traits-for-each-jar">Implementing the traits for each jar</a></h2>
|
|
<p>The <code>Database</code> struct also needs to implement the <a href="tutorial/./jar.html#database-trait-for-the-jar">database traits for each jar</a>.
|
|
In our case, though, we already wrote that impl as a <a href="tutorial/./jar.html#implementing-the-database-trait-for-the-jar">blanket impl alongside the jar itself</a>,
|
|
so no action is needed.
|
|
This is the recommended strategy unless your trait has custom members that depend on fields of the <code>Database</code> itself
|
|
(for example, sometimes the <code>Database</code> holds some kind of custom resource that you want to give access to).</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-ir"><a class="header" href="#defining-the-ir">Defining the IR</a></h1>
|
|
<p>Before we can define the <a href="tutorial/./parser.html">parser</a>, we need to define the intermediate representation (IR) that we will use for <code>calc</code> programs.
|
|
In the <a href="tutorial/./structure.html">basic structure</a>, we defined some "pseudo-Rust" structures like <code>Statement</code> and <code>Expression</code>;
|
|
now we are going to define them for real.</p>
|
|
<h2 id="salsa-structs"><a class="header" href="#salsa-structs">"Salsa structs"</a></h2>
|
|
<p>In addition to regular Rust types, we will make use of various <strong>Salsa structs</strong>.
|
|
A Salsa struct is a struct that has been annotated with one of the Salsa annotations:</p>
|
|
<ul>
|
|
<li><a href="tutorial/ir.html#input-structs"><code>#[salsa::input]</code></a>, which designates the "base inputs" to your computation;</li>
|
|
<li><a href="tutorial/ir.html#tracked-structs"><code>#[salsa::tracked]</code></a>, which designate intermediate values created during your computation;</li>
|
|
<li><a href="tutorial/ir.html#interned-structs"><code>#[salsa::interned]</code></a>, which designate small values that are easy to compare for equality.</li>
|
|
</ul>
|
|
<p>All Salsa structs store the actual values of their fields in the Salsa database.
|
|
This permits us to track when the values of those fields change to figure out what work will need to be re-executed.</p>
|
|
<p>When you annotate a struct with one of the above Salsa attributes, Salsa actually generates a bunch of code to link that struct into the database.
|
|
This code must be connected to some <a href="tutorial/./jar.html">jar</a>.
|
|
By default, this is <code>crate::Jar</code>, but you can specify a different jar with the <code>jar=</code> attribute (e.g., <code>#[salsa::input(jar = MyJar)]</code>).
|
|
You must also list the struct in the jar definition itself, or you will get errors.</p>
|
|
<h2 id="input-structs"><a class="header" href="#input-structs">Input structs</a></h2>
|
|
<p>The first thing we will define is our <strong>input</strong>.
|
|
Every Salsa program has some basic inputs that drive the rest of the computation.
|
|
The rest of the program must be some deterministic function of those base inputs,
|
|
such that when those inputs change, we can try to efficiently recompute the new result of that function.</p>
|
|
<p>Inputs are defined as Rust structs with a <code>#[salsa::input]</code> annotation:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::input]
|
|
pub struct SourceProgram {
|
|
#[return_ref]
|
|
pub text: String,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>In our compiler, we have just one simple input, the <code>SourceProgram</code>, which has a <code>text</code> field (the string).</p>
|
|
<h3 id="the-data-lives-in-the-database"><a class="header" href="#the-data-lives-in-the-database">The data lives in the database</a></h3>
|
|
<p>Although they are declared like other Rust structs, Salsa structs are implemented quite differently.
|
|
The values of their fields are stored in the Salsa database and the struct themselves just reference it.
|
|
This means that the struct instances are copy (no matter what fields they contain).
|
|
Creating instances of the struct and accessing fields is done by invoking methods like <code>new</code> as well as getters and setters.</p>
|
|
<p>In the case of <code>#[salsa::input]</code>, the struct contains a <code>salsa::Id</code>, which is a non-zero integer.
|
|
Therefore, the generated <code>SourceProgram</code> struct looks something like this:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
pub struct SourceProgram(salsa::Id);
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>It will also generate a method <code>new</code> that lets you create a <code>SourceProgram</code> in the database.
|
|
For an input, a <code>&db</code> reference is required, along with the values for each field:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>let source = SourceProgram::new(&db, "print 11 + 11".to_string());
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>You can read the value of the field with <code>source.text(&db)</code>,
|
|
and you can set the value of the field with <code>source.set_text(&mut db, "print 11 * 2".to_string())</code>.</p>
|
|
<h3 id="database-revisions"><a class="header" href="#database-revisions">Database revisions</a></h3>
|
|
<p>Whenever a function takes an <code>&mut</code> reference to the database,
|
|
that means that it can only be invoked from outside the incrementalized part of your program,
|
|
as explained in <a href="tutorial/../overview.html#goal-of-salsa">the overview</a>.
|
|
When you change the value of an input field, that increments a 'revision counter' in the database,
|
|
indicating that some inputs are different now.
|
|
When we talk about a "revision" of the database, we are referring to the state of the database in between changes to the input values.</p>
|
|
<h3 id="representing-the-parsed-program"><a class="header" href="#representing-the-parsed-program">Representing the parsed program</a></h3>
|
|
<p>Next we will define a <strong>tracked struct</strong>.
|
|
Whereas inputs represent the <em>start</em> of a computation, tracked structs represent intermediate values created during your computation.</p>
|
|
<p>In this case, the parser is going to take in the <code>SourceProgram</code> struct that we saw and return a <code>Program</code> that represents the fully parsed program:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
pub struct Program<'db> {
|
|
#[return_ref]
|
|
pub statements: Vec<Statement<'db>>,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Like with an input, the fields of a tracked struct are also stored in the database.
|
|
Unlike an input, those fields are immutable (they cannot be "set"), and Salsa compares them across revisions to know when they have changed.
|
|
In this case, if parsing the input produced the same <code>Program</code> result
|
|
(e.g., because the only change to the input was some trailing whitespace, perhaps),
|
|
then subsequent parts of the computation won't need to re-execute.
|
|
(We'll revisit the role of tracked structs in reuse more in future parts of the IR.)</p>
|
|
<p>Apart from the fields being immutable, the API for working with a tracked struct is quite similar to an input:</p>
|
|
<ul>
|
|
<li>You can create a new value by using <code>new</code>: e.g., <code>Program::new(&db, some_statements)</code></li>
|
|
<li>You use a getter to read the value of a field, just like with an input (e.g., <code>my_func.statements(db)</code> to read the <code>statements</code> field).
|
|
<ul>
|
|
<li>In this case, the field is tagged as <code>#[return_ref]</code>, which means that the getter will return a <code>&Vec<Statement></code>, instead of cloning the vector.</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<h3 id="the-db-lifetime"><a class="header" href="#the-db-lifetime">The <code>'db</code> lifetime</a></h3>
|
|
<p>Unlike inputs, tracked structs carry a <code>'db</code> lifetime.
|
|
This lifetime is tied to the <code>&db</code> used to create them and
|
|
ensures that, so long as you are using the struct,
|
|
the database remains immutable:
|
|
in other words, you cannot change the values of a <code>salsa::Input</code>.</p>
|
|
<p>The <code>'db</code> lifetime also allows tracked structs to be implemented
|
|
using a pointer (versus the numeric id found in <code>salsa::input</code> structs).
|
|
This doesn't really effect you as a user except that it allows accessing fields from tracked structs—
|
|
a very common operation—to be optimized.</p>
|
|
<h2 id="representing-functions"><a class="header" href="#representing-functions">Representing functions</a></h2>
|
|
<p>We will also use a tracked struct to represent each function:
|
|
The <code>Function</code> struct is going to be created by the parser to represent each of the functions defined by the user:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
pub struct Function<'db> {
|
|
#[id]
|
|
pub name: FunctionId<'db>,
|
|
|
|
name_span: Span<'db>,
|
|
|
|
#[return_ref]
|
|
pub args: Vec<VariableId<'db>>,
|
|
|
|
#[return_ref]
|
|
pub body: Expression<'db>,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>If we had created some <code>Function</code> instance <code>f</code>, for example, we might find that <code>the f.body</code> field changes
|
|
because the user changed the definition of <code>f</code>.
|
|
This would mean that we have to re-execute those parts of the code that depended on <code>f.body</code>
|
|
(but not those parts of the code that depended on the body of <em>other</em> functions).</p>
|
|
<p>Apart from the fields being immutable, the API for working with a tracked struct is quite similar to an input:</p>
|
|
<ul>
|
|
<li>You can create a new value by using <code>new</code>: e.g., <code>Function::new(&db, some_name, some_args, some_body)</code></li>
|
|
<li>You use a getter to read the value of a field, just like with an input (e.g., <code>my_func.args(db)</code> to read the <code>args</code> field).</li>
|
|
</ul>
|
|
<h3 id="id-fields-1"><a class="header" href="#id-fields-1">id fields</a></h3>
|
|
<p>To get better reuse across revisions, particularly when things are reordered, you can mark some entity fields with <code>#[id]</code>.
|
|
Normally, you would do this on fields that represent the "name" of an entity.
|
|
This indicates that, across two revisions R1 and R2, if two functions are created with the same name, they refer to the same entity, so we can compare their other fields for equality to determine what needs to be re-executed.
|
|
Adding <code>#[id]</code> attributes is an optimization and never affects correctness.
|
|
For more details, see the <a href="tutorial/../reference/algorithm.html">algorithm</a> page of the reference.</p>
|
|
<h2 id="interned-structs-1"><a class="header" href="#interned-structs-1">Interned structs</a></h2>
|
|
<p>The final kind of Salsa struct are <em>interned structs</em>.
|
|
As with input and tracked structs, the data for an interned struct is stored in the database.
|
|
Unlike those structs, if you intern the same data twice, you get back the <strong>same integer</strong>.</p>
|
|
<p>A classic use of interning is for small strings like function names and variables.
|
|
It's annoying and inefficient to pass around those names with <code>String</code> values which must be cloned;
|
|
it's also inefficient to have to compare them for equality via string comparison.
|
|
Therefore, we define two interned structs, <code>FunctionId</code> and <code>VariableId</code>, each with a single field that stores the string:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::interned]
|
|
pub struct VariableId<'db> {
|
|
#[return_ref]
|
|
pub text: String,
|
|
}
|
|
|
|
#[salsa::interned]
|
|
pub struct FunctionId<'db> {
|
|
#[return_ref]
|
|
pub text: String,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>When you invoke e.g. <code>FunctionId::new(&db, "my_string".to_string())</code>, you will get back a <code>FunctionId</code> that is just a newtype'd integer.
|
|
But if you invoke the same call to <code>new</code> again, you get back the same integer:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>let f1 = FunctionId::new(&db, "my_string".to_string());
|
|
let f2 = FunctionId::new(&db, "my_string".to_string());
|
|
assert_eq!(f1, f2);
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<h3 id="interned-values-carry-a-db-lifetime"><a class="header" href="#interned-values-carry-a-db-lifetime">Interned values carry a <code>'db</code> lifetime</a></h3>
|
|
<p>Like tracked structs, interned values carry a <code>'db</code> lifetime that prevents them from being used across salsa revisions.
|
|
It also permits them to be implemented using a pointer "under the hood", permitting efficient field access.
|
|
Interned values are guaranteed to be consistent within a single revision.
|
|
Across revisions, they may be cleared, reallocated, or reassigned -- but you cannot generally observe this,
|
|
since the <code>'db</code> lifetime prevents you from changing inputs (and hence creating a new revision)
|
|
while an interned value is in active use.</p>
|
|
<h3 id="expressions-and-statements"><a class="header" href="#expressions-and-statements">Expressions and statements</a></h3>
|
|
<p>We won't use any special "Salsa structs" for expressions and statements:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[derive(Eq, PartialEq, Debug, Hash, new, salsa::Update, salsa::DebugWithDb)]
|
|
pub struct Statement<'db> {
|
|
pub span: Span<'db>,
|
|
|
|
pub data: StatementData<'db>,
|
|
}
|
|
|
|
#[derive(Eq, PartialEq, Debug, Hash, salsa::Update, salsa::DebugWithDb)]
|
|
pub enum StatementData<'db> {
|
|
/// Defines `fn <name>(<args>) = <body>`
|
|
Function(Function<'db>),
|
|
/// Defines `print <expr>`
|
|
Print(Expression<'db>),
|
|
}
|
|
|
|
#[derive(Eq, PartialEq, Debug, Hash, new, salsa::Update, salsa::DebugWithDb)]
|
|
pub struct Expression<'db> {
|
|
pub span: Span<'db>,
|
|
|
|
pub data: ExpressionData<'db>,
|
|
}
|
|
|
|
#[derive(Eq, PartialEq, Debug, Hash, salsa::Update, salsa::DebugWithDb)]
|
|
pub enum ExpressionData<'db> {
|
|
Op(Box<Expression<'db>>, Op, Box<Expression<'db>>),
|
|
Number(OrderedFloat<f64>),
|
|
Variable(VariableId<'db>),
|
|
Call(FunctionId<'db>, Vec<Expression<'db>>),
|
|
}
|
|
|
|
#[derive(Eq, PartialEq, Copy, Clone, Hash, Debug, salsa::Update, salsa::DebugWithDb)]
|
|
pub enum Op {
|
|
Add,
|
|
Subtract,
|
|
Multiply,
|
|
Divide,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Since statements and expressions are not tracked, this implies that we are only attempting to get incremental re-use at the granularity of functions --
|
|
whenever anything in a function body changes, we consider the entire function body dirty and re-execute anything that depended on it.
|
|
It usually makes sense to draw some kind of "reasonably coarse" boundary like this.</p>
|
|
<p>One downside of the way we have set things up: we inlined the position into each of the structs.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-parser-memoized-functions-and-inputs"><a class="header" href="#defining-the-parser-memoized-functions-and-inputs">Defining the parser: memoized functions and inputs</a></h1>
|
|
<p>The next step in the <code>calc</code> compiler is to define the parser.
|
|
The role of the parser will be to take the <code>ProgramSource</code> input,
|
|
read the string from the <code>text</code> field,
|
|
and create the <code>Statement</code>, <code>Function</code>, and <code>Expression</code> structures that <a href="tutorial/./ir.html">we defined in the <code>ir</code> module</a>.</p>
|
|
<p>To minimize dependencies, we are going to write a <a href="https://en.wikipedia.org/wiki/Recursive_descent_parser">recursive descent parser</a>.
|
|
Another option would be to use a <a href="https://rustrepo.com/catalog/rust-parsing_newest_1">Rust parsing framework</a>.
|
|
We won't cover the parsing itself in this tutorial -- you can read the code if you want to see how it works.
|
|
We're going to focus only on the Salsa-related aspects.</p>
|
|
<h2 id="the-parse_statements-function"><a class="header" href="#the-parse_statements-function">The <code>parse_statements</code> function</a></h2>
|
|
<p>The starting point for the parser is the <code>parse_statements</code> function:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
pub fn parse_statements<'db>(db: &'db dyn crate::Db, source: SourceProgram) -> Program<'db> {
|
|
// Get the source text from the database
|
|
let source_text = source.text(db);
|
|
|
|
// Create the parser
|
|
let mut parser = Parser {
|
|
db,
|
|
source_text,
|
|
position: 0,
|
|
};
|
|
|
|
// Read in statements until we reach the end of the input
|
|
let mut result = vec![];
|
|
loop {
|
|
// Skip over any whitespace
|
|
parser.skip_whitespace();
|
|
|
|
// If there are no more tokens, break
|
|
if parser.peek().is_none() {
|
|
break;
|
|
}
|
|
|
|
// Otherwise, there is more input, so parse a statement.
|
|
if let Some(statement) = parser.parse_statement() {
|
|
result.push(statement);
|
|
} else {
|
|
// If we failed, report an error at whatever position the parser
|
|
// got stuck. We could recover here by skipping to the end of the line
|
|
// or something like that. But we leave that as an exercise for the reader!
|
|
parser.report_error();
|
|
break;
|
|
}
|
|
}
|
|
|
|
Program::new(db, result)
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>This function is annotated as <code>#[salsa::tracked]</code>.
|
|
That means that, when it is called, Salsa will track what inputs it reads as well as what value it returns.
|
|
The return value is <em>memoized</em>,
|
|
which means that if you call this function again without changing the inputs,
|
|
Salsa will just clone the result rather than re-execute it.</p>
|
|
<h3 id="tracked-functions-are-the-unit-of-reuse"><a class="header" href="#tracked-functions-are-the-unit-of-reuse">Tracked functions are the unit of reuse</a></h3>
|
|
<p>Tracked functions are the core part of how Salsa enables incremental reuse.
|
|
The goal of the framework is to avoid re-executing tracked functions and instead to clone their result.
|
|
Salsa uses the <a href="tutorial/../reference/algorithm.html">red-green algorithm</a> to decide when to re-execute a function.
|
|
The short version is that a tracked function is re-executed if either (a) it directly reads an input, and that input has changed,
|
|
or (b) it directly invokes another tracked function and that function's return value has changed.
|
|
In the case of <code>parse_statements</code>, it directly reads <code>ProgramSource::text</code>, so if the text changes, then the parser will re-execute.</p>
|
|
<p>By choosing which functions to mark as <code>#[tracked]</code>, you control how much reuse you get.
|
|
In our case, we're opting to mark the outermost parsing function as tracked, but not the inner ones.
|
|
This means that if the input changes, we will always re-parse the entire input and re-create the resulting statements and so forth.
|
|
We'll see later that this <em>doesn't</em> mean we will always re-run the type checker and other parts of the compiler.</p>
|
|
<p>This trade-off makes sense because (a) parsing is very cheap, so the overhead of tracking and enabling finer-grained reuse doesn't pay off
|
|
and because (b) since strings are just a big blob-o-bytes without any structure, it's rather hard to identify which parts of the IR need to be reparsed.
|
|
Some systems do choose to do more granular reparsing, often by doing a "first pass" over the string to give it a bit of structure,
|
|
e.g. to identify the functions,
|
|
but deferring the parsing of the body of each function until later.
|
|
Setting up a scheme like this is relatively easy in Salsa and uses the same principles that we will use later to avoid re-executing the type checker.</p>
|
|
<h3 id="parameters-to-a-tracked-function"><a class="header" href="#parameters-to-a-tracked-function">Parameters to a tracked function</a></h3>
|
|
<p>The <strong>first</strong> parameter to a tracked function is <strong>always</strong> the database, <code>db: &dyn crate::Db</code>.
|
|
It must be a <code>dyn</code> value of whatever database is associated with the jar.</p>
|
|
<p>The <strong>second</strong> parameter to a tracked function is <strong>always</strong> some kind of Salsa struct.
|
|
The first parameter to a memoized function is always the database,
|
|
which should be a <code>dyn Trait</code> value for the database trait associated with the jar
|
|
(the default jar is <code>crate::Jar</code>).</p>
|
|
<p>Tracked functions may take other arguments as well, though our examples here do not.
|
|
Functions that take additional arguments are less efficient and flexible.
|
|
It's generally better to structure tracked functions as functions of a single Salsa struct if possible.</p>
|
|
<h3 id="the-return_ref-annotation"><a class="header" href="#the-return_ref-annotation">The <code>return_ref</code> annotation</a></h3>
|
|
<p>You may have noticed that <code>parse_statements</code> is tagged with <code>#[salsa::tracked(return_ref)]</code>.
|
|
Ordinarily, when you call a tracked function, the result you get back is cloned out of the database.
|
|
The <code>return_ref</code> attribute means that a reference into the database is returned instead.
|
|
So, when called, <code>parse_statements</code> will return an <code>&Vec<Statement></code> rather than cloning the <code>Vec</code>.
|
|
This is useful as a performance optimization.
|
|
(You may recall the <code>return_ref</code> annotation from the <a href="tutorial/./ir.html">ir</a> section of the tutorial,
|
|
where it was placed on struct fields, with roughly the same meaning.)</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-parser-reporting-errors"><a class="header" href="#defining-the-parser-reporting-errors">Defining the parser: reporting errors</a></h1>
|
|
<p>The last interesting case in the parser is how to handle a parse error.
|
|
Because Salsa functions are memoized and may not execute, they should not have side-effects,
|
|
so we don't just want to call <code>eprintln!</code>.
|
|
If we did so, the error would only be reported the first time the function was called, but not
|
|
on subsequent calls in the situation where the simply returns its memoized value.</p>
|
|
<p>Salsa defines a mechanism for managing this called an <strong>accumulator</strong>.
|
|
In our case, we define an accumulator struct called <code>Diagnostics</code> in the <code>ir</code> module:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::accumulator]
|
|
pub struct Diagnostics(Diagnostic);
|
|
|
|
#[derive(new, Clone, Debug)]
|
|
pub struct Diagnostic {
|
|
pub start: usize,
|
|
pub end: usize,
|
|
pub message: String,
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Accumulator structs are always newtype structs with a single field, in this case of type <code>Diagnostic</code>.
|
|
Memoized functions can <em>push</em> <code>Diagnostic</code> values onto the accumulator.
|
|
Later, you can invoke a method to find all the values that were pushed by the memoized functions
|
|
or any functions that they called
|
|
(e.g., we could get the set of <code>Diagnostic</code> values produced by the <code>parse_statements</code> function).</p>
|
|
<p>The <code>Parser::report_error</code> method contains an example of pushing a diagnostic:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span> /// Report an error diagnostic at the current position.
|
|
fn report_error(&self) {
|
|
let next_position = match self.peek() {
|
|
Some(ch) => self.position + ch.len_utf8(),
|
|
None => self.position,
|
|
};
|
|
Diagnostics::push(
|
|
self.db,
|
|
Diagnostic {
|
|
start: self.position,
|
|
end: next_position,
|
|
message: "unexpected character".to_string(),
|
|
},
|
|
);
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>To get the set of diagnostics produced by <code>parse_errors</code>, or any other memoized function,
|
|
we invoke the associated <code>accumulated</code> function:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>let accumulated: Vec<Diagnostic> =
|
|
parse_statements::accumulated::<Diagnostics>(db);
|
|
// -----------
|
|
// Use turbofish to specify
|
|
// the diagnostics type.
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p><code>accumulated</code> takes the database <code>db</code> as argument and returns a <code>Vec</code>.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-parser-debug-impls-and-testing"><a class="header" href="#defining-the-parser-debug-impls-and-testing">Defining the parser: debug impls and testing</a></h1>
|
|
<p>As the final part of the parser, we need to write some tests.
|
|
To do so, we will create a database, set the input source text, run the parser, and check the result.
|
|
Before we can do that, though, we have to address one question: how do we inspect the value of an interned type like <code>Expression</code>?</p>
|
|
<h2 id="the-debugwithdb-trait"><a class="header" href="#the-debugwithdb-trait">The <code>DebugWithDb</code> trait</a></h2>
|
|
<p>Because an interned type like <code>Expression</code> just stores an integer, the traditional <code>Debug</code> trait is not very useful.
|
|
To properly print a <code>Expression</code>, you need to access the Salsa database to find out what its value is.
|
|
To solve this, <code>salsa</code> provides a <code>DebugWithDb</code> trait that acts like the regular <code>Debug</code>, but takes a database as argument.
|
|
For types that implement this trait, you can invoke the <code>debug</code> method.
|
|
This returns a temporary that implements the ordinary <code>Debug</code> trait, allowing you to write something like</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>eprintln!("Expression = {:?}", expr.debug(db));
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>and get back the output you expect.</p>
|
|
<p>The <code>DebugWithDb</code> trait is automatically derived for all <code>#[input]</code>, <code>#[interned]</code>, and <code>#[tracked]</code> structs.</p>
|
|
<h2 id="forwarding-to-the-ordinary-debug-trait"><a class="header" href="#forwarding-to-the-ordinary-debug-trait">Forwarding to the ordinary <code>Debug</code> trait</a></h2>
|
|
<p>For consistency, it is sometimes useful to have a <code>DebugWithDb</code> implementation even for types, like <code>Op</code>, that are just ordinary enums. You can do that like so:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span>
|
|
<span class="boring">fn main() {
|
|
</span><span class="boring">}
|
|
</span></code></pre></pre>
|
|
<h2 id="writing-the-unit-test"><a class="header" href="#writing-the-unit-test">Writing the unit test</a></h2>
|
|
<p>Now that we have our <code>DebugWithDb</code> impls in place, we can write a simple unit test harness.
|
|
The <code>parse_string</code> function below creates a database, sets the source text, and then invokes the parser:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>/// Create a new database with the given source text and parse the result.
|
|
/// Returns the statements and the diagnostics generated.
|
|
#[cfg(test)]
|
|
fn parse_string(source_text: &str) -> String {
|
|
use salsa::debug::DebugWithDb;
|
|
|
|
// Create the database
|
|
let db = crate::db::Database::default();
|
|
|
|
// Create the source program
|
|
let source_program = SourceProgram::new(&db, source_text.to_string());
|
|
|
|
// Invoke the parser
|
|
let statements = parse_statements(&db, source_program);
|
|
|
|
// Read out any diagnostics
|
|
let accumulated = parse_statements::accumulated::<Diagnostics>(&db, source_program);
|
|
|
|
// Format the result as a string and return it
|
|
format!("{:#?}", (statements.debug(&db), accumulated))
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Combined with the <a href="https://crates.io/crates/expect-test"><code>expect-test</code></a> crate, we can then write unit tests like this one:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[test]
|
|
fn parse_print() {
|
|
let actual = parse_string("print 1 + 2");
|
|
let expected = expect_test::expect![[r#"
|
|
(
|
|
Program {
|
|
[salsa id]: 0,
|
|
statements: [
|
|
Statement {
|
|
span: Span {
|
|
[salsa id]: 4,
|
|
start: 0,
|
|
end: 11,
|
|
},
|
|
data: Print(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: 3,
|
|
start: 6,
|
|
end: 11,
|
|
},
|
|
data: Op(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: 0,
|
|
start: 6,
|
|
end: 7,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
1.0,
|
|
),
|
|
),
|
|
},
|
|
Add,
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: 2,
|
|
start: 10,
|
|
end: 11,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
2.0,
|
|
),
|
|
),
|
|
},
|
|
),
|
|
},
|
|
),
|
|
},
|
|
],
|
|
},
|
|
[],
|
|
)"#]];
|
|
expected.assert_eq(&actual);
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-checker"><a class="header" href="#defining-the-checker">Defining the checker</a></h1>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="defining-the-interpreter"><a class="header" href="#defining-the-interpreter">Defining the interpreter</a></h1>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="reference"><a class="header" href="#reference">Reference</a></h1>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="durability"><a class="header" href="#durability">Durability</a></h1>
|
|
<p>"Durability" is an optimization that can greatly improve the performance of your salsa programs.
|
|
Durability specifies the probability that an input's value will change.
|
|
The default is "low durability".
|
|
But when you set the value of an input, you can manually specify a higher durability,
|
|
typically <code>Durability::HIGH</code>.
|
|
Salsa tracks when tracked functions only consume values of high durability
|
|
and, if no high durability input has changed, it can skip traversing their
|
|
dependencies.</p>
|
|
<p>Typically "high durability" values are things like data read from the standard library
|
|
or other inputs that aren't actively being edited by the end user.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="the-red-green-algorithm"><a class="header" href="#the-red-green-algorithm">The "red-green" algorithm</a></h1>
|
|
<p>This page explains the basic Salsa incremental algorithm.
|
|
The algorithm is called the "red-green" algorithm, which is where the name Salsa comes from.</p>
|
|
<h3 id="database-revisions-1"><a class="header" href="#database-revisions-1">Database revisions</a></h3>
|
|
<p>The Salsa database always tracks a single <strong>revision</strong>. Each time you set an input, the revision is incremented. So we start in revision <code>R1</code>, but when a <code>set</code> method is called, we will go to <code>R2</code>, then <code>R3</code>, and so on. For each input, we also track the revision in which it was last changed.</p>
|
|
<h3 id="basic-rule-when-inputs-change-re-execute"><a class="header" href="#basic-rule-when-inputs-change-re-execute">Basic rule: when inputs change, re-execute!</a></h3>
|
|
<p>When you invoke a tracked function, in addition to storing the value that was returned, we also track what <em>other</em> tracked functions it depends on, and the revisions when their value last changed. When you invoke the function again, if the database is in a new revision, then we check whether any of the inputs to this function have changed in that new revision. If not, we can just return our cached value. But if the inputs <em>have</em> changed (or may have changed), we will re-execute the function to find the most up-to-date answer.</p>
|
|
<p>Here is a simple example, where the <code>parse_module</code> function invokes the <code>module_text</code> function:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
fn parse_module(db: &dyn Db, module: Module) -> Ast {
|
|
let module_text: &String = module_text(db, module);
|
|
Ast::parse_text(module_text)
|
|
}
|
|
|
|
#[salsa::tracked(return_ref)]
|
|
fn module_text(db: &dyn Db, module: Module) -> String {
|
|
panic!("text for module `{module:?}` not set")
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>If we invoke <code>parse_module</code> twice, but change the module text in between, then we will have to re-execute <code>parse_module</code>:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>module_text::set(
|
|
db,
|
|
module,
|
|
"fn foo() { }".to_string(),
|
|
);
|
|
parse_module(db, module); // executes
|
|
|
|
// ...some time later...
|
|
|
|
module_text::set(
|
|
db,
|
|
module,
|
|
"fn foo() { /* add a comment */ }".to_string(),
|
|
);
|
|
parse_module(db, module); // executes again!
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<h3 id="backdating-sometimes-we-can-be-smarter"><a class="header" href="#backdating-sometimes-we-can-be-smarter">Backdating: sometimes we can be smarter</a></h3>
|
|
<p>Often, though, tracked functions don't depend directly on the inputs. Instead, they'll depend on some other tracked function. For example, perhaps we have a <code>type_check</code> function that reads the AST:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
fn type_check(db: &dyn Db, module: Module) {
|
|
let ast = parse_module(db, module);
|
|
...
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>If the module text is changed, we saw that we have to re-execute <code>parse_module</code>, but there are many changes to the source text that still produce the same AST. For example, suppose we simply add a comment? In that case, if <code>type_check</code> is called again, we will:</p>
|
|
<ul>
|
|
<li>First re-execute <code>parse_module</code>, since its input changed.</li>
|
|
<li>We will then compare the resulting AST. If it's the same as last time, we can <em>backdate</em> the result, meaning that we say that, even though the inputs changed, the output didn't.</li>
|
|
</ul>
|
|
<h2 id="durability-an-optimization"><a class="header" href="#durability-an-optimization">Durability: an optimization</a></h2>
|
|
<p>As an optimization, Salsa includes the concept of <strong>durability</strong>, which is the notion of how often some piece of tracked data changes. </p>
|
|
<p>For example, when compiling a Rust program, you might mark the inputs from crates.io as <em>high durability</em> inputs, since they are unlikely to change. The current workspace could be marked as <em>low durability</em>, since changes to it are happening all the time.</p>
|
|
<p>When you set the value of a tracked function, you can also set it with a given <em>durability</em>:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>module_text::set_with_durability(
|
|
db,
|
|
module,
|
|
"fn foo() { }".to_string(),
|
|
salsa::Durability::HIGH
|
|
);
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>For each durability, we track the revision in which <em>some input</em> with that durability changed. If a tracked function depends (transitively) only on high durability inputs, and you change a low durability input, then we can very easily determine that the tracked function result is still valid, avoiding the need to traverse the input edges one by one.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="common-patterns"><a class="header" href="#common-patterns">Common patterns</a></h1>
|
|
<p>This section documents patterns for using Salsa.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="selection"><a class="header" href="#selection">Selection</a></h1>
|
|
<p>The "selection" (or "firewall") pattern is when you have a query Qsel that reads from some
|
|
other Qbase and extracts some small bit of information from Qbase that it returns.
|
|
In particular, Qsel does not combine values from other queries. In some sense,
|
|
then, Qsel is redundant -- you could have just extracted the information
|
|
the information from Qbase yourself, and done without the salsa machinery. But
|
|
Qsel serves a role in that it limits the amount of re-execution that is required
|
|
when Qbase changes.</p>
|
|
<h2 id="example-the-base-query"><a class="header" href="#example-the-base-query">Example: the base query</a></h2>
|
|
<p>For example, imagine that you have a query <code>parse</code> that parses the input text of a request
|
|
and returns a <code>ParsedResult</code>, which contains a header and a body:</p>
|
|
<pre><code class="language-rust ignore">#[derive(Clone, Debug, PartialEq, Eq)]
|
|
struct ParsedResult {
|
|
header: Vec<ParsedHeader>,
|
|
body: String,
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
struct ParsedHeader {
|
|
key: String,
|
|
value: String,
|
|
}
|
|
|
|
#[salsa::query_group(Request)]
|
|
trait RequestParser {
|
|
/// The base text of the request.
|
|
#[salsa::input]
|
|
fn request_text(&self) -> String;
|
|
|
|
/// The parsed form of the request.
|
|
fn parse(&self) -> ParsedResult;
|
|
}
|
|
</code></pre>
|
|
<h2 id="example-a-selecting-query"><a class="header" href="#example-a-selecting-query">Example: a selecting query</a></h2>
|
|
<p>And now you have a number of derived queries that only look at the header.
|
|
For example, one might extract the "content-type' header:</p>
|
|
<pre><code class="language-rust ignore">#[salsa::query_group(Request)]
|
|
trait RequestUtil: RequestParser {
|
|
fn content_type(&self) -> Option<String>;
|
|
}
|
|
|
|
fn content_type(db: &dyn RequestUtil) -> Option<String> {
|
|
db.parse()
|
|
.header
|
|
.iter()
|
|
.find(|header| header.key == "content-type")
|
|
.map(|header| header.value.clone())
|
|
}
|
|
</code></pre>
|
|
<h2 id="why-prefer-a-selecting-query"><a class="header" href="#why-prefer-a-selecting-query">Why prefer a selecting query?</a></h2>
|
|
<p>This <code>content_type</code> query is an instance of the <em>selection</em> pattern. It only
|
|
"selects" a small bit of information from the <code>ParsedResult</code>. You might not have
|
|
made it a query at all, but instead made it a method on <code>ParsedResult</code>.</p>
|
|
<p>But using a query for <code>content_type</code> has an advantage: now if there are downstream
|
|
queries that only depend on the <code>content_type</code> (or perhaps on other headers extracted
|
|
via a similar pattern), those queries will not have to be re-executed when the request
|
|
changes <em>unless</em> the content-type header changes. Consider the dependency graph:</p>
|
|
<pre><code class="language-text">request_text --> parse --> content_type --> (other queries)
|
|
</code></pre>
|
|
<p>When the <code>request_text</code> changes, we are always going to have to re-execute <code>parse</code>.
|
|
If that produces a new parsed result, we are <em>also</em> going to re-execute <code>content_type</code>.
|
|
But if the result of <code>content_type</code> has not changed, then we will <em>not</em> re-execute
|
|
the other queries.</p>
|
|
<h2 id="more-levels-of-selection"><a class="header" href="#more-levels-of-selection">More levels of selection</a></h2>
|
|
<p>In fact, in our example we might consider introducing another level of selection.
|
|
Instead of having <code>content_type</code> directly access the results of <code>parse</code>, it might be better
|
|
to insert a selecting query that just extracts the header:</p>
|
|
<pre><code class="language-rust ignore">#[salsa::query_group(Request)]
|
|
trait RequestUtil: RequestParser {
|
|
fn header(&self) -> Vec<ParsedHeader>;
|
|
fn content_type(&self) -> Option<String>;
|
|
}
|
|
|
|
fn header(db: &dyn RequestUtil) -> Vec<ParsedHeader> {
|
|
db.parse().header
|
|
}
|
|
|
|
fn content_type(db: &dyn RequestUtil) -> Option<String> {
|
|
db.header()
|
|
.iter()
|
|
.find(|header| header.key == "content-type")
|
|
.map(|header| header.value.clone())
|
|
}
|
|
</code></pre>
|
|
<p>This will result in a dependency graph like so:</p>
|
|
<pre><code class="language-text">request_text --> parse --> header --> content_type --> (other queries)
|
|
</code></pre>
|
|
<p>The advantage of this is that changes that only effect the "body" or
|
|
only consume small parts of the request will
|
|
not require us to re-execute <code>content_type</code> at all. This would be particularly
|
|
valuable if there are a lot of dependent headers.</p>
|
|
<h2 id="a-note-on-cloning-and-efficiency"><a class="header" href="#a-note-on-cloning-and-efficiency">A note on cloning and efficiency</a></h2>
|
|
<p>In this example, we used common Rust types like <code>Vec</code> and <code>String</code>,
|
|
and we cloned them quite frequently. This will work just fine in Salsa,
|
|
but it may not be the most efficient choice. This is because each clone
|
|
is going to produce a deep copy of the result. As a simple fix, you
|
|
might convert your data structures to use <code>Arc</code> (e.g., <code>Arc<Vec<ParsedHeader>></code>),
|
|
which makes cloning cheap.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="on-demand-lazy-inputs"><a class="header" href="#on-demand-lazy-inputs">On-Demand (Lazy) Inputs</a></h1>
|
|
<blockquote>
|
|
<p>⚠️ <strong>IN-PROGRESS VERSION OF SALSA.</strong> ⚠️</p>
|
|
<p>This page describes the unreleased "Salsa 2022" version, which is a major departure from older versions of salsa. The code here works but is only available on github and from the <code>salsa-2022</code> crate.</p>
|
|
<p>If you are looking for the older version of salsa, simply visit <a href="https://salsa-rs.netlify.app/salsa">this link</a></p>
|
|
</blockquote>
|
|
<p>Salsa inputs work best if you can easily provide all of the inputs upfront.
|
|
However sometimes the set of inputs is not known beforehand.</p>
|
|
<p>A typical example is reading files from disk.
|
|
While it is possible to eagerly scan a particular directory and create an in-memory file tree as salsa input structs, a more straight-forward approach is to read the files lazily.
|
|
That is, when a query requests the text of a file for the first time:</p>
|
|
<ol>
|
|
<li>Read the file from disk and cache it.</li>
|
|
<li>Setup a file-system watcher for this path.</li>
|
|
<li>Update the cached file when the watcher sends a change notification.</li>
|
|
</ol>
|
|
<p>This is possible to achieve in salsa, by caching the inputs in your database structs and adding a method to the database trait to retrieve them out of this cache.</p>
|
|
<p>A complete, runnable file-watching example can be found in <a href="https://github.com/salsa-rs/salsa/tree/master/examples-2022/lazy-input">the lazy-input example</a>.</p>
|
|
<p>The setup looks roughly like this:</p>
|
|
<pre><code class="language-rust ignore">#[salsa::input]
|
|
struct File {
|
|
path: PathBuf,
|
|
#[return_ref]
|
|
contents: String,
|
|
}
|
|
|
|
trait Db: salsa::DbWithJar<Jar> {
|
|
fn input(&self, path: PathBuf) -> Result<File>;
|
|
}
|
|
|
|
#[salsa::db(Jar)]
|
|
struct Database {
|
|
storage: salsa::Storage<Self>,
|
|
logs: Mutex<Vec<String>>,
|
|
files: DashMap<PathBuf, File>,
|
|
file_watcher: Mutex<Debouncer<RecommendedWatcher>>,
|
|
}
|
|
|
|
impl Database {
|
|
fn new(tx: Sender<DebounceEventResult>) -> Self {
|
|
let storage = Default::default();
|
|
Self {
|
|
storage,
|
|
logs: Default::default(),
|
|
files: DashMap::new(),
|
|
file_watcher: Mutex::new(new_debouncer(Duration::from_secs(1), None, tx).unwrap()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Db for Database {
|
|
fn input(&self, path: PathBuf) -> Result<File> {
|
|
let path = path
|
|
.canonicalize()
|
|
.wrap_err_with(|| format!("Failed to read {}", path.display()))?;
|
|
Ok(match self.files.entry(path.clone()) {
|
|
// If the file already exists in our cache then just return it.
|
|
Entry::Occupied(entry) => *entry.get(),
|
|
// If we haven't read this file yet set up the watch, read the
|
|
// contents, store it in the cache, and return it.
|
|
Entry::Vacant(entry) => {
|
|
// Set up the watch before reading the contents to try to avoid
|
|
// race conditions.
|
|
let watcher = &mut *self.file_watcher.lock().unwrap();
|
|
watcher
|
|
.watcher()
|
|
.watch(&path, RecursiveMode::NonRecursive)
|
|
.unwrap();
|
|
let contents = std::fs::read_to_string(&path)
|
|
.wrap_err_with(|| format!("Failed to read {}", path.display()))?;
|
|
*entry.insert(File::new(self, path, contents))
|
|
}
|
|
})
|
|
}
|
|
}
|
|
</code></pre>
|
|
<ul>
|
|
<li>We declare a method on the <code>Db</code> trait that gives us a <code>File</code> input on-demand (it only requires a <code>&dyn Db</code> not a <code>&mut dyn Db</code>).</li>
|
|
<li>There should only be one input struct per file, so we implement that method using a cache (<code>DashMap</code> is like a <code>RwLock<HashMap></code>).</li>
|
|
</ul>
|
|
<p>The driving code that's doing the top-level queries is then in charge of updating the file contents when a file-change notification arrives.
|
|
It does this by updating the Salsa input in the same way that you would update any other input.</p>
|
|
<p>Here we implement a simple driving loop, that recompiles the code whenever a file changes.
|
|
You can use the logs to check that only the queries that could have changed are re-evaluated.</p>
|
|
<pre><code class="language-rust ignore">fn main() -> Result<()> {
|
|
// Create the channel to receive file change events.
|
|
let (tx, rx) = unbounded();
|
|
let mut db = Database::new(tx);
|
|
|
|
let initial_file_path = std::env::args_os()
|
|
.nth(1)
|
|
.ok_or_else(|| eyre!("Usage: ./lazy-input <input-file>"))?;
|
|
|
|
// Create the initial input using the input method so that changes to it
|
|
// will be watched like the other files.
|
|
let initial = db.input(initial_file_path.into())?;
|
|
loop {
|
|
// Compile the code starting at the provided input, this will read other
|
|
// needed files using the on-demand mechanism.
|
|
let sum = compile(&db, initial);
|
|
let diagnostics = compile::accumulated::<Diagnostic>(&db, initial);
|
|
if diagnostics.is_empty() {
|
|
println!("Sum is: {}", sum);
|
|
} else {
|
|
for diagnostic in diagnostics {
|
|
println!("{}", diagnostic);
|
|
}
|
|
}
|
|
|
|
for log in db.logs.lock().unwrap().drain(..) {
|
|
eprintln!("{}", log);
|
|
}
|
|
|
|
// Wait for file change events, the output can't change unless the
|
|
// inputs change.
|
|
for event in rx.recv()?.unwrap() {
|
|
let path = event.path.canonicalize().wrap_err_with(|| {
|
|
format!("Failed to canonicalize path {}", event.path.display())
|
|
})?;
|
|
let file = match db.files.get(&path) {
|
|
Some(file) => *file,
|
|
None => continue,
|
|
};
|
|
// `path` has changed, so read it and update the contents to match.
|
|
// This creates a new revision and causes the incremental algorithm
|
|
// to kick in, just like any other update to a salsa input.
|
|
let contents = std::fs::read_to_string(path)
|
|
.wrap_err_with(|| format!("Failed to read file {}", event.path.display()))?;
|
|
file.set_contents(&mut db).to(contents);
|
|
}
|
|
}
|
|
}
|
|
</code></pre>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="tuning-salsa"><a class="header" href="#tuning-salsa">Tuning Salsa</a></h1>
|
|
<h2 id="lru-cache"><a class="header" href="#lru-cache">LRU Cache</a></h2>
|
|
<p>You can specify an LRU cache size for any non-input query:</p>
|
|
<pre><code class="language-rs">let lru_capacity: usize = 128;
|
|
base_db::ParseQuery.in_db_mut(self).set_lru_capacity(lru_capacity);
|
|
</code></pre>
|
|
<p>The default is <code>0</code>, which disables LRU-caching entirely.</p>
|
|
<p>Note that there is no garbage collection for keys and
|
|
results of old queries, so LRU caches are currently the
|
|
only knob available for avoiding unbounded memory usage
|
|
for long-running apps built on Salsa.</p>
|
|
<h2 id="intern-queries"><a class="header" href="#intern-queries">Intern Queries</a></h2>
|
|
<p>Intern queries can make key lookup cheaper, save memory, and
|
|
avoid the need for <a href="https://doc.rust-lang.org/std/sync/struct.Arc.html"><code>Arc</code></a>.</p>
|
|
<p>Interning is especially useful for queries that involve nested,
|
|
tree-like data structures.</p>
|
|
<p>See:</p>
|
|
<ul>
|
|
<li>The <a href="https://github.com/salsa-rs/salsa/blob/master/examples/compiler/main.rs"><code>compiler</code> example</a>,
|
|
which uses interning.</li>
|
|
</ul>
|
|
<h2 id="granularity-of-incrementality"><a class="header" href="#granularity-of-incrementality">Granularity of Incrementality</a></h2>
|
|
<p>See:</p>
|
|
<ul>
|
|
<li><a href="./common_patterns/selection.html">common patterns: selection</a> and</li>
|
|
<li>The <a href="https://github.com/salsa-rs/salsa/blob/master/examples/selection/main.rs"><code>selection</code> example</a></li>
|
|
</ul>
|
|
<h2 id="cancellation"><a class="header" href="#cancellation">Cancellation</a></h2>
|
|
<p>Queries that are no longer needed due to concurrent writes or changes in dependencies are cancelled
|
|
by Salsa. Each access of an intermediate query is a potential cancellation point. Cancellation is
|
|
implemented via panicking, and Salsa internals are intended to be panic-safe.</p>
|
|
<p>If you have a query that contains a long loop which does not execute any intermediate queries,
|
|
salsa won't be able to cancel it automatically. You may wish to check for cancellation yourself
|
|
by invoking <code>db.unwind_if_cancelled()</code>.</p>
|
|
<p>For more details on cancellation, see the tests for cancellation behavior in the Salsa repo.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="cycle-handling"><a class="header" href="#cycle-handling">Cycle handling</a></h1>
|
|
<p>By default, when Salsa detects a cycle in the computation graph, Salsa will panic with a <a href="https://github.com/salsa-rs/salsa/blob/0f9971ad94d5d137f1192fde2b02ccf1d2aca28c/src/lib.rs#L654-L672"><code>salsa::Cycle</code></a> as the panic value. The <a href="https://github.com/salsa-rs/salsa/blob/0f9971ad94d5d137f1192fde2b02ccf1d2aca28c/src/lib.rs#L654-L672"><code>salsa::Cycle</code></a> structure that describes the cycle, which can be useful for diagnosing what went wrong.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="recovering-via-fallback"><a class="header" href="#recovering-via-fallback">Recovering via fallback</a></h1>
|
|
<p>Panicking when a cycle occurs is ok for situations where you believe a cycle is impossible. But sometimes cycles can result from illegal user input and cannot be statically prevented. In these cases, you might prefer to gracefully recover from a cycle rather than panicking the entire query. Salsa supports that with the idea of <em>cycle recovery</em>.</p>
|
|
<p>To use cycle recovery, you annotate potential participants in the cycle with a <code>#[salsa::cycle(my_recover_fn)]</code> attribute. When a cycle occurs, if any participant P has recovery information, then no panic occurs. Instead, the execution of P is aborted and P will execute the recovery function to generate its result. Participants in the cycle that do not have recovery information continue executing as normal, using this recovery result.</p>
|
|
<p>The recovery function has a similar signature to a query function. It is given a reference to your database along with a <code>salsa::Cycle</code> describing the cycle that occurred; it returns the result of the query. Example:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>fn my_recover_fn(
|
|
db: &dyn MyDatabase,
|
|
cycle: &salsa::Cycle,
|
|
) -> MyResultValue
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>The <code>db</code> and <code>cycle</code> argument can be used to prepare a useful error message for your users.</p>
|
|
<p><strong>Important:</strong> Although the recovery function is given a <code>db</code> handle, you should be careful to avoid creating a cycle from within recovery or invoking queries that may be participating in the current cycle. Attempting to do so can result in inconsistent results.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="how-salsa-works"><a class="header" href="#how-salsa-works">How Salsa works</a></h1>
|
|
<h2 id="video-available"><a class="header" href="#video-available">Video available</a></h2>
|
|
<p>To get the most complete introduction to Salsa's inner workings, check
|
|
out <a href="https://youtu.be/_muY4HjSqVw">the "How Salsa Works" video</a>. If
|
|
you'd like a deeper dive, <a href="https://www.youtube.com/watch?v=i_IhACacPRY">the "Salsa in more depth"
|
|
video</a> digs into the
|
|
details of the incremental algorithm.</p>
|
|
<blockquote>
|
|
<p>If you're in China, watch videos on <a href="https://www.bilibili.com/video/BV1Df4y1A7t3/">"How Salsa Works"</a>, <a href="https://www.bilibili.com/video/BV1AM4y1G7E4/">"Salsa In More Depth"</a>.</p>
|
|
</blockquote>
|
|
<h2 id="key-idea"><a class="header" href="#key-idea">Key idea</a></h2>
|
|
<p>The key idea of <code>salsa</code> is that you define your program as a set of
|
|
<strong>queries</strong>. Every query is used like a function <code>K -> V</code> that maps from
|
|
some key of type <code>K</code> to a value of type <code>V</code>. Queries come in two basic
|
|
varieties:</p>
|
|
<ul>
|
|
<li><strong>Inputs</strong>: the base inputs to your system. You can change these
|
|
whenever you like.</li>
|
|
<li><strong>Functions</strong>: pure functions (no side effects) that transform your
|
|
inputs into other values. The results of queries are memoized to
|
|
avoid recomputing them a lot. When you make changes to the inputs,
|
|
we'll figure out (fairly intelligently) when we can re-use these
|
|
memoized values and when we have to recompute them.</li>
|
|
</ul>
|
|
<h2 id="how-to-use-salsa-in-three-easy-steps"><a class="header" href="#how-to-use-salsa-in-three-easy-steps">How to use Salsa in three easy steps</a></h2>
|
|
<p>Using Salsa is as easy as 1, 2, 3...</p>
|
|
<ol>
|
|
<li>Define one or more <strong>query groups</strong> that contain the inputs
|
|
and queries you will need. We'll start with one such group, but
|
|
later on you can use more than one to break up your system into
|
|
components (or spread your code across crates).</li>
|
|
<li>Define the <strong>query functions</strong> where appropriate.</li>
|
|
<li>Define the <strong>database</strong>, which contains the storage for all
|
|
the inputs/queries you will be using. The query struct will contain
|
|
the storage for all of the inputs/queries and may also contain
|
|
anything else that your code needs (e.g., configuration data).</li>
|
|
</ol>
|
|
<p>To see an example of this in action, check out <a href="https://github.com/salsa-rs/salsa/blob/master/examples/hello_world/main.rs">the <code>hello_world</code>
|
|
example</a>, which has a number of comments explaining how
|
|
things work.</p>
|
|
<h2 id="digging-into-the-plumbing"><a class="header" href="#digging-into-the-plumbing">Digging into the plumbing</a></h2>
|
|
<p>Check out the <a href="plumbing.html">plumbing</a> chapter to see a deeper explanation of the
|
|
code that Salsa generates and how it connects to the Salsa library.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="videos"><a class="header" href="#videos">Videos</a></h1>
|
|
<p>There is currently one video available on the newest version of Salsa:</p>
|
|
<ul>
|
|
<li><a href="https://www.youtube.com/watch?v=vrnNvAAoQFk">Salsa Architecture Walkthrough</a>,
|
|
which covers many aspects of the redesigned architecture.</li>
|
|
</ul>
|
|
<p>There are also two videos on the older version Salsa, but they are rather
|
|
outdated:</p>
|
|
<ul>
|
|
<li><a href="https://youtu.be/_muY4HjSqVw">How Salsa Works</a>, which gives a high-level
|
|
introduction to the key concepts involved and shows how to use Salsa;</li>
|
|
<li><a href="https://www.youtube.com/watch?v=i_IhACacPRY">Salsa In More Depth</a>, which digs
|
|
into the incremental algorithm and explains -- at a high-level -- how Salsa is
|
|
implemented.</li>
|
|
</ul>
|
|
<blockquote>
|
|
<p>If you're in China, watch videos on
|
|
<a href="https://www.bilibili.com/video/BV1Df4y1A7t3/">How Salsa Works</a>,
|
|
<a href="https://www.bilibili.com/video/BV1AM4y1G7E4/">Salsa In More Depth</a>.</p>
|
|
</blockquote>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="plumbing"><a class="header" href="#plumbing">Plumbing</a></h1>
|
|
<blockquote>
|
|
<p>⚠️ <strong>IN-PROGRESS VERSION OF SALSA.</strong> ⚠️</p>
|
|
<p>This page describes the unreleased "Salsa 2022" version, which is a major departure from older versions of salsa. The code here works but is only available on github and from the <code>salsa-2022</code> crate.</p>
|
|
<p>If you are looking for the older version of salsa, simply visit <a href="https://salsa-rs.netlify.app/salsa">this link</a></p>
|
|
</blockquote>
|
|
<p>This chapter documents the code that salsa generates and its "inner workings".
|
|
We refer to this as the "plumbing".</p>
|
|
<h2 id="overview"><a class="header" href="#overview">Overview</a></h2>
|
|
<p>The plumbing section is broken up into chapters:</p>
|
|
<ul>
|
|
<li>The <a href="./plumbing/jars_and_ingredients.html">jars and ingredients</a> covers how each salsa item (like a tracked function) specifies what data it needs and runtime, and how links between items work.</li>
|
|
<li>The <a href="./plumbing/database_and_runtime.html">database and runtime</a> covers the data structures that are used at runtime to coordinate workers, trigger cancellation, track which functions are active and what dependencies they have accrued, and so forth.</li>
|
|
<li>The <a href="./plumbing/query_ops.html">query operations</a> chapter describes how the major operations on function ingredients work. This text was written for an older version of salsa but the logic is the same:
|
|
<ul>
|
|
<li>The <a href="./plumbing/maybe_changed_after.html">maybe changed after</a> operation determines when a memoized value for a tracked function is out of date.</li>
|
|
<li>The <a href="./plumbing/fetch.html">fetch</a> operation computes the most recent value.</li>
|
|
<li>The <a href="./plumbing/derived_flowchart.html">derived queries flowchart</a> depicts the logic in flowchart form.</li>
|
|
<li>The <a href="./plumbing/cycles.html">cycle handling</a> handling chapter describes what happens when cycles occur.</li>
|
|
</ul>
|
|
</li>
|
|
<li>The <a href="./plumbing/terminology.html">terminology</a> section describes various words that appear throughout.</li>
|
|
</ul>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="jars-and-ingredients"><a class="header" href="#jars-and-ingredients">Jars and ingredients</a></h1>
|
|
<blockquote>
|
|
<p>⚠️ <strong>IN-PROGRESS VERSION OF SALSA.</strong> ⚠️</p>
|
|
<p>This page describes the unreleased "Salsa 2022" version, which is a major departure from older versions of salsa. The code here works but is only available on github and from the <code>salsa-2022</code> crate.</p>
|
|
<p>If you are looking for the older version of salsa, simply visit <a href="https://salsa-rs.netlify.app/salsa">this link</a></p>
|
|
</blockquote>
|
|
<p>This page covers how data is organized in Salsa and how links between Salsa items (e.g., dependency tracking) work.</p>
|
|
<h2 id="salsa-items-and-ingredients"><a class="header" href="#salsa-items-and-ingredients">Salsa items and ingredients</a></h2>
|
|
<p>A <strong>Salsa item</strong> is some item annotated with a Salsa annotation that can be included in a jar.
|
|
For example, a tracked function is a Salsa item:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
fn foo(db: &dyn Db, input: MyInput) { }
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>...and so is a Salsa input...</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::input]
|
|
struct MyInput { }
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>...or a tracked struct:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>#[salsa::tracked]
|
|
struct MyStruct { }
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Each Salsa item needs certain bits of data at runtime to operate.
|
|
These bits of data are called <strong>ingredients</strong>.
|
|
Most Salsa items generate a single ingredient, but sometimes they make more than one.
|
|
For example, a tracked function generates a <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/function.rs#L42"><code>FunctionIngredient</code></a>.
|
|
A tracked struct, however, generates several ingredients, one for the struct itself (a <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/tracked_struct.rs#L18"><code>TrackedStructIngredient</code></a>,
|
|
and one <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/function.rs#L42"><code>FunctionIngredient</code></a> for each value field.</p>
|
|
<h3 id="ingredients-define-the-core-logic-of-salsa"><a class="header" href="#ingredients-define-the-core-logic-of-salsa">Ingredients define the core logic of Salsa</a></h3>
|
|
<p>Most of the interesting Salsa code lives in these ingredients.
|
|
For example, when you create a new tracked struct, the method <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/tracked_struct.rs#L76"><code>TrackedStruct::new_struct</code></a> is invoked;
|
|
it is responsible for determining the tracked struct's id.
|
|
Similarly, when you call a tracked function, that is translated into a call to <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/function/fetch.rs#L15"><code>TrackedFunction::fetch</code></a>,
|
|
which decides whether there is a valid memoized value to return,
|
|
or whether the function must be executed.</p>
|
|
<h3 id="the-ingredient-trait"><a class="header" href="#the-ingredient-trait">The <code>Ingredient</code> trait</a></h3>
|
|
<p>Each ingredient implements the <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/ingredient.rs#L15"><code>Ingredient<DB></code></a> trait, which defines generic operations supported by any kind of ingredient.
|
|
For example, the method <code>maybe_changed_after</code> can be used to check whether some particular piece of data stored in the ingredient may have changed since a given revision:</p>
|
|
<p>We'll see below that each database <code>DB</code> is able to take an <code>IngredientIndex</code> and use that to get an <code>&dyn Ingredient<DB></code> for the corresponding ingredient.
|
|
This allows the database to perform generic operations on an indexed ingredient without knowing exactly what the type of that ingredient is.</p>
|
|
<h3 id="jars-are-a-collection-of-ingredients"><a class="header" href="#jars-are-a-collection-of-ingredients">Jars are a collection of ingredients</a></h3>
|
|
<p>When you declare a Salsa jar, you list out each of the Salsa items that are included in that jar:</p>
|
|
<pre><code class="language-rust ignore">#[salsa::jar]
|
|
struct Jar(
|
|
foo,
|
|
MyInput,
|
|
MyStruct
|
|
);
|
|
</code></pre>
|
|
<p>This expands to a struct like so:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>struct Jar(
|
|
<foo as IngredientsFor>::Ingredient,
|
|
<MyInput as IngredientsFor>::Ingredient,
|
|
<MyStruct as IngredientsFor>::Ingredient,
|
|
)
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>The <code>IngredientsFor</code> trait is used to define the ingredients needed by some Salsa item, such as the tracked function <code>foo</code> or the tracked struct <code>MyInput</code>.
|
|
Each Salsa item defines a type <code>I</code> so that <code><I as IngredientsFor>::Ingredient</code> gives the ingredients needed by <code>I</code>.</p>
|
|
<h3 id="a-database-is-a-tuple-of-jars"><a class="header" href="#a-database-is-a-tuple-of-jars">A database is a tuple of jars</a></h3>
|
|
<p>Salsa's database storage ultimately boils down to a tuple of jar structs
|
|
where each jar struct (as we just saw) itself contains the ingredients
|
|
for the Salsa items within that jar.
|
|
The database can thus be thought of as a list of ingredients,
|
|
although that list is organized into a 2-level hierarchy.</p>
|
|
<p>The reason for this 2-level hierarchy is that it permits separate compilation and privacy.
|
|
The crate that lists the jars doens't have to know the contents of the jar to embed the jar struct in the database.
|
|
And some of the types that appear in the jar may be private to another struct.</p>
|
|
<h3 id="the-hasjars-trait-and-the-jars-type"><a class="header" href="#the-hasjars-trait-and-the-jars-type">The <code>HasJars</code> trait and the <code>Jars</code> type</a></h3>
|
|
<p>Each Salsa database implements the <code>HasJars</code> trait,
|
|
generated by the <code>salsa::db</code> procedural macro.
|
|
The <code>HarJars</code> trait, among other things, defines a <code>Jars</code> associated type that maps to a tuple of the jars in the trait.</p>
|
|
<p>For example, given a database like this...</p>
|
|
<pre><code class="language-rust ignore">#[salsa::db(Jar1, ..., JarN)]
|
|
struct MyDatabase {
|
|
storage: salsa::Storage<Self>
|
|
}
|
|
</code></pre>
|
|
<p>...the <code>salsa::db</code> macro would generate a <code>HasJars</code> impl that (among other things) contains <code>type Jars = (Jar1, ..., JarN)</code>:</p>
|
|
<pre><code class="language-rust ignore"> impl salsa::storage::HasJars for #db {
|
|
type Jars = (#(#jar_paths,)*);
|
|
</code></pre>
|
|
<p>In turn, the <code>salsa::Storage<DB></code> type ultimately contains a struct <code>Shared</code> that embeds <code>DB::Jars</code>, thus embedding all the data for each jar.</p>
|
|
<h3 id="ingredient-indices"><a class="header" href="#ingredient-indices">Ingredient indices</a></h3>
|
|
<p>During initialization, each ingredient in the database is assigned a unique index called the <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/routes.rs#L5-L9"><code>IngredientIndex</code></a>.
|
|
This is a 32-bit number that identifies a particular ingredient from a particular jar.</p>
|
|
<h3 id="routes"><a class="header" href="#routes">Routes</a></h3>
|
|
<p>In addition to an index, each ingredient in the database also has a corresponding <em>route</em>.
|
|
A route is a closure that, given a reference to the <code>DB::Jars</code> tuple,
|
|
returns a <code>&dyn Ingredient<DB></code> reference.
|
|
The route table allows us to go from the <code>IngredientIndex</code> for a particular ingredient
|
|
to its <code>&dyn Ingredient<DB></code> trait object.
|
|
The route table is created while the database is being initialized,
|
|
as described shortly.</p>
|
|
<h3 id="database-keys-and-dependency-keys"><a class="header" href="#database-keys-and-dependency-keys">Database keys and dependency keys</a></h3>
|
|
<p>A <code>DatabaseKeyIndex</code> identifies a specific value stored in some specific ingredient.
|
|
It combines an <a href="https://github.com/salsa-rs/salsa/blob/becaade31e6ebc58cd0505fc1ee4b8df1f39f7de/components/salsa-2022/src/routes.rs#L5-L9"><code>IngredientIndex</code></a> with a <code>key_index</code>, which is a <code>salsa::Id</code>:</p>
|
|
<pre><code class="language-rust ignore">/// An "active" database key index represents a database key index
|
|
/// that is actively executing. In that case, the `key_index` cannot be
|
|
/// None.
|
|
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
|
|
pub struct DatabaseKeyIndex {
|
|
pub(crate) ingredient_index: IngredientIndex,
|
|
pub(crate) key_index: Id,
|
|
}
|
|
</code></pre>
|
|
<p>A <code>DependencyIndex</code> is similar, but the <code>key_index</code> is optional.
|
|
This is used when we sometimes wish to refer to the ingredient as a whole, and not any specific value within the ingredient.</p>
|
|
<p>These kinds of indices are used to store connetions between ingredients.
|
|
For example, each memoized value has to track its inputs.
|
|
Those inputs are stored as dependency indices.
|
|
We can then do things like ask, "did this input change since revision R?" by</p>
|
|
<ul>
|
|
<li>using the ingredient index to find the route and get a <code>&dyn Ingredient<DB></code></li>
|
|
<li>and then invoking the <code>maybe_changed_since</code> method on that trait object.</li>
|
|
</ul>
|
|
<h3 id="hasjarsdyn"><a class="header" href="#hasjarsdyn"><code>HasJarsDyn</code></a></h3>
|
|
<p>There is one catch in the above setup.
|
|
The user's code always interacts with a <code>dyn crate::Db</code> value, where <code>crate::Db</code> is the trait defined by the jar; the <code>crate::Db</code> trait extends <code>salsa::HasJar</code> which in turn extends <code>salsa::Database</code>.
|
|
Ideally, we would have <code>salsa::Database</code> extend <code>salsa::HasJars</code>, which is the main trait that gives access to the jars data.
|
|
But we don't want to do that because <code>HasJars</code> defines an associated type <code>Jars</code>, and that would mean that every reference to <code>dyn crate::Db</code> would have to specify the jars type using something like <code>dyn crate::Db<Jars = J></code>.
|
|
This would be unergonomic, but what's worse, it would actually be impossible: the final Jars type combines the jars from multiple crates, and so it is not known to any individual jar crate.
|
|
To workaround this, <code>salsa::Database</code> in fact extends <em>another</em> trait, <code>HasJarsDyn</code>, that doesn't reveal the <code>Jars</code> or ingredient types directly, but just has various method that can be performed on an ingredient, given its <code>IngredientIndex</code>.
|
|
Traits like <code>Ingredient<DB></code> require knowing the full <code>DB</code> type.
|
|
If we had one function ingredient directly invoke a method on <code>Ingredient<DB></code>, that would imply that it has to be fully generic and only instantiated at the final crate, when the full database type is available.</p>
|
|
<p>We solve this via the <code>HasJarsDyn</code> trait. The <code>HasJarsDyn</code> trait exports a method that combines the "find ingredient, invoking method" steps into one method:</p>
|
|
<pre><code class="language-rust ignore aasaaasdfijjAasdfa">/// Dyn friendly subset of HasJars
|
|
pub trait HasJarsDyn {
|
|
fn runtime(&self) -> &Runtime;
|
|
|
|
fn runtime_mut(&mut self) -> &mut Runtime;
|
|
|
|
fn maybe_changed_after(&self, input: DependencyIndex, revision: Revision) -> bool;
|
|
|
|
fn cycle_recovery_strategy(&self, input: IngredientIndex) -> CycleRecoveryStrategy;
|
|
|
|
fn origin(&self, input: DatabaseKeyIndex) -> Option<QueryOrigin>;
|
|
|
|
fn mark_validated_output(&self, executor: DatabaseKeyIndex, output: DependencyIndex);
|
|
|
|
/// Invoked when `executor` used to output `stale_output` but no longer does.
|
|
/// This method routes that into a call to the [`remove_stale_output`](`crate::ingredient::Ingredient::remove_stale_output`)
|
|
/// method on the ingredient for `stale_output`.
|
|
fn remove_stale_output(&self, executor: DatabaseKeyIndex, stale_output: DependencyIndex);
|
|
|
|
/// Informs `ingredient` that the salsa struct with id `id` has been deleted.
|
|
/// This means that `id` will not be used in this revision and hence
|
|
/// any memoized values keyed by that struct can be discarded.
|
|
///
|
|
/// In order to receive this callback, `ingredient` must have registered itself
|
|
/// as a dependent function using
|
|
/// [`SalsaStructInDb::register_dependent_fn`](`crate::salsa_struct::SalsaStructInDb::register_dependent_fn`).
|
|
fn salsa_struct_deleted(&self, ingredient: IngredientIndex, id: Id);
|
|
|
|
fn fmt_index(&self, index: DependencyIndex, fmt: &mut fmt::Formatter<'_>) -> fmt::Result;
|
|
}
|
|
</code></pre>
|
|
<p>So, technically, to check if an input has changed, an ingredient:</p>
|
|
<ul>
|
|
<li>Invokes <code>HasJarsDyn::maybe_changed_after</code> on the <code>dyn Database</code></li>
|
|
<li>The impl for this method (generated by <code>#[salsa::db]</code>):
|
|
<ul>
|
|
<li>gets the route for the ingredient from the ingredient index</li>
|
|
<li>uses the route to get a <code>&dyn Ingredient</code></li>
|
|
<li>invokes <code>maybe_changed_after</code> on that ingredient</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<h3 id="initializing-the-database"><a class="header" href="#initializing-the-database">Initializing the database</a></h3>
|
|
<p>The last thing to dicsuss is how the database is initialized.
|
|
The <code>Default</code> implementation for <code>Storage<DB></code> does the work:</p>
|
|
<pre><code class="language-rust ignore">impl<DB> Default for Storage<DB>
|
|
where
|
|
DB: HasJars,
|
|
{
|
|
fn default() -> Self {
|
|
let mut routes = Routes::new();
|
|
let jars = DB::create_jars(&mut routes);
|
|
Self {
|
|
shared: Shared {
|
|
jars: Some(Arc::from(jars)),
|
|
cvar: Arc::new(Default::default()),
|
|
noti_lock: Arc::new(parking_lot::Mutex::new(())),
|
|
},
|
|
routes: Arc::new(routes),
|
|
runtime: Runtime::default(),
|
|
}
|
|
}
|
|
}
|
|
</code></pre>
|
|
<p>First, it creates an empty <code>Routes</code> instance.
|
|
Then it invokes the <code>DB::create_jars</code> method.
|
|
The implementation of this method is defined by the <code>#[salsa::db]</code> macro; it invokes <code>salsa::plumbing::create_jars_inplace</code> to allocate memory for the jars, and then invokes the <code>Jar::init_jar</code> method on each of the jars to initialize them:</p>
|
|
<pre><code class="language-rust ignore"> fn create_jars(routes: &mut salsa::routes::Routes<Self>) -> Box<Self::Jars> {
|
|
unsafe {
|
|
salsa::plumbing::create_jars_inplace::<#db>(|jars| {
|
|
<span class="boring"> (
|
|
</span> unsafe {
|
|
let place = std::ptr::addr_of_mut!((*jars).#jar_field_names);
|
|
<#jar_paths as salsa::jar::Jar>::init_jar(place, routes);
|
|
}
|
|
)*
|
|
})
|
|
}
|
|
}
|
|
</code></pre>
|
|
<p>This implementation for <code>init_jar</code> is generated by the <code>#[salsa::jar]</code> macro, and simply walks over the representative type for each salsa item and asks <em>it</em> to create its ingredients</p>
|
|
<pre><code class="language-rust ignore"> quote! {
|
|
unsafe impl<'salsa_db> salsa::jar::Jar<'salsa_db> for #jar_struct {
|
|
type DynDb = dyn #jar_trait + 'salsa_db;
|
|
|
|
unsafe fn init_jar<DB>(place: *mut Self, routes: &mut salsa::routes::Routes<DB>)
|
|
where
|
|
DB: salsa::storage::JarFromJars<Self> + salsa::storage::DbWithJar<Self>,
|
|
{
|
|
<span class="boring"> (
|
|
</span> unsafe {
|
|
std::ptr::addr_of_mut!((*place).#field_var_names)
|
|
.write(<#field_tys as salsa::storage::IngredientsFor>::create_ingredients(routes));
|
|
}
|
|
)*
|
|
}
|
|
}
|
|
}
|
|
</code></pre>
|
|
<p>The code to create the ingredients for any particular item is generated by their associated macros (e.g., <code>#[salsa::tracked]</code>, <code>#[salsa::input]</code>), but it always follows a particular structure.
|
|
To create an ingredient, we first invoke <code>Routes::push</code>, which creates the routes to that ingredient and assigns it an <code>IngredientIndex</code>.
|
|
We can then invoke a function such as <code>FunctionIngredient::new</code> to create the structure.
|
|
The <em>routes</em> to an ingredient are defined as closures that, given the <code>DB::Jars</code>, can find the data for a particular ingredient.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="database-and-runtime"><a class="header" href="#database-and-runtime">Database and runtime</a></h1>
|
|
<p>A salsa database struct is declared by the user with the <code>#[salsa::db]</code> annotation.
|
|
It contains all the data that the program needs to execute:</p>
|
|
<pre><code class="language-rust ignore">#[salsa::db(jar0...jarn)]
|
|
struct MyDatabase {
|
|
storage: Storage<Self>,
|
|
maybe_other_fields: u32,
|
|
}
|
|
</code></pre>
|
|
<p>This data is divided into two categories:</p>
|
|
<ul>
|
|
<li>Salsa-governed storage, contained in the <code>Storage<Self></code> field. This data is mandatory.</li>
|
|
<li>Other fields (like <code>maybe_other_fields</code>) defined by the user. This can be anything. This allows for you to give access to special resources or whatever.</li>
|
|
</ul>
|
|
<h2 id="parallel-handles"><a class="header" href="#parallel-handles">Parallel handles</a></h2>
|
|
<p>When used across parallel threads, the database type defined by the user must support a "snapshot" operation.
|
|
This snapshot should create a clone of the database that can be used by the parallel threads.
|
|
The <code>Storage</code> operation itself supports <code>snapshot</code>.
|
|
The <code>Snapshot</code> method returns a <code>Snapshot<DB></code> type, which prevents these clones from being accessed via an <code>&mut</code> reference.</p>
|
|
<h2 id="the-storage-struct"><a class="header" href="#the-storage-struct">The Storage struct</a></h2>
|
|
<p>The salsa <code>Storage</code> struct contains all the data that salsa itself will use and work with.
|
|
There are three key bits of data:</p>
|
|
<ul>
|
|
<li>The <code>Shared</code> struct, which contains the data stored across all snapshots. This is primarily the ingredients described in the <a href="plumbing/./jars_and_ingredients.html">jars and ingredients chapter</a>, but it also contains some synchronization information (a cond var). This is used for cancellation, as described below.
|
|
<ul>
|
|
<li>The data in the <code>Shared</code> struct is only shared across threads when other threads are active. Some operations, like mutating an input, require an <code>&mut</code> handle to the <code>Shared</code> struct. This is obtained by using the <code>Arc::get_mut</code> methods; obviously this is only possible when all snapshots and threads have ceased executing, since there must be a single handle to the <code>Arc</code>.</li>
|
|
</ul>
|
|
</li>
|
|
<li>The <code>Routes</code> struct, which contains the information to find any particular ingredient -- this is also shared across all handles, and its construction is also described in the <a href="plumbing/./jars_and_ingredients.html">jars and ingredients chapter</a>. The routes are separated out from the <code>Shared</code> struct because they are truly immutable at all times, and we want to be able to hold a handle to them while getting <code>&mut</code> access to the <code>Shared</code> struct.</li>
|
|
<li>The <code>Runtime</code> struct, which is specific to a particular database instance. It contains the data for a single active thread, along with some links to shared data of its own.</li>
|
|
</ul>
|
|
<h2 id="incrementing-the-revision-counter-and-getting-mutable-access-to-the-jars"><a class="header" href="#incrementing-the-revision-counter-and-getting-mutable-access-to-the-jars">Incrementing the revision counter and getting mutable access to the jars</a></h2>
|
|
<p>Salsa's general model is that there is a single "master" copy of the database and, potentially, multiple snapshots.
|
|
The snapshots are not directly owned, they are instead enclosed in a <code>Snapshot<DB></code> type that permits only <code>&</code>-deref,
|
|
and so the only database that can be accessed with an <code>&mut</code>-ref is the master database.
|
|
Each of the snapshots however onlys another handle on the <code>Arc</code> in <code>Storage</code> that stores the ingredients.</p>
|
|
<p>Whenever the user attempts to do an <code>&mut</code>-operation, such as modifying an input field, that needs to
|
|
first cancel any parallel snapshots and wait for those parallel threads to finish.
|
|
Once the snapshots have completed, we can use <code>Arc::get_mut</code> to get an <code>&mut</code> reference to the ingredient data.
|
|
This allows us to get <code>&mut</code> access without any unsafe code and
|
|
guarantees that we have successfully managed to cancel the other worker threads
|
|
(or gotten ourselves into a deadlock).</p>
|
|
<p>The code to acquire <code>&mut</code> access to the database is the <code>jars_mut</code> method:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span> /// Gets mutable access to the jars. This will trigger a new revision
|
|
/// and it will also cancel any ongoing work in the current revision.
|
|
/// Any actual writes that occur to data in a jar should use
|
|
/// [`Runtime::report_tracked_write`].
|
|
pub fn jars_mut(&mut self) -> (&mut DB::Jars, &mut Runtime) {
|
|
// Wait for all snapshots to be dropped.
|
|
self.cancel_other_workers();
|
|
|
|
// Increment revision counter.
|
|
self.runtime.new_revision();
|
|
|
|
// Acquire `&mut` access to `self.shared` -- this is only possible because
|
|
// the snapshots have all been dropped, so we hold the only handle to the `Arc`.
|
|
let jars = Arc::get_mut(self.shared.jars.as_mut().unwrap()).unwrap();
|
|
|
|
// Inform other ingredients that a new revision has begun.
|
|
// This gives them a chance to free resources that were being held until the next revision.
|
|
let routes = self.routes.clone();
|
|
for route in routes.reset_routes() {
|
|
route(jars).reset_for_new_revision();
|
|
}
|
|
|
|
// Return mut ref to jars + runtime.
|
|
(jars, &mut self.runtime)
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>The key initial point is that it invokes <code>cancel_other_workers</code> before proceeding:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span> /// Sets cancellation flag and blocks until all other workers with access
|
|
/// to this storage have completed.
|
|
///
|
|
/// This could deadlock if there is a single worker with two handles to the
|
|
/// same database!
|
|
fn cancel_other_workers(&mut self) {
|
|
loop {
|
|
self.runtime.set_cancellation_flag();
|
|
|
|
// Acquire lock before we check if we have unique access to the jars.
|
|
// If we do not yet have unique access, we will go to sleep and wait for
|
|
// the snapshots to be dropped, which will signal the cond var associated
|
|
// with this lock.
|
|
//
|
|
// NB: We have to acquire the lock first to ensure that we can check for
|
|
// unique access and go to sleep waiting on the condvar atomically,
|
|
// as described in PR #474.
|
|
let mut guard = self.shared.noti_lock.lock();
|
|
// If we have unique access to the jars, we are done.
|
|
if Arc::get_mut(self.shared.jars.as_mut().unwrap()).is_some() {
|
|
return;
|
|
}
|
|
|
|
// Otherwise, wait until some other storage entities have dropped.
|
|
//
|
|
// The cvar `self.shared.cvar` is notified by the `Drop` impl.
|
|
self.shared.cvar.wait(&mut guard);
|
|
}
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<h2 id="the-salsa-runtime"><a class="header" href="#the-salsa-runtime">The Salsa runtime</a></h2>
|
|
<p>The salsa runtime offers helper methods that are accessed by the ingredients.
|
|
It tracks, for example, the active query stack, and contains methods for adding dependencies between queries (e.g., <code>report_tracked_read</code>) or <a href="plumbing/./cycles.html">resolving cycles</a>.
|
|
It also tracks the current revision and information about when values with low or high durability last changed.</p>
|
|
<p>Basically, the ingredient structures store the "data at rest" -- like memoized values -- and things that are "per ingredient".</p>
|
|
<p>The runtime stores the "active, in-progress" data, such as which queries are on the stack, and/or the dependencies accessed by the currently active query.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="the-db-lifetime-1"><a class="header" href="#the-db-lifetime-1">The <code>'db</code> lifetime</a></h1>
|
|
<p><a href="plumbing/./tracked_structs.html">Tracked</a> and interned structs are both declared with a <code>'db</code> lifetime.
|
|
This lifetime is linked to the <code>db: &DB</code> reference used to create them.
|
|
The <code>'db</code> lifetime has several implications:</p>
|
|
<ul>
|
|
<li>It ensures that the user does not create a new salsa revision while a tracked/interned struct is in active use. Creating a new salsa revision requires modifying an input which requires an <code>&mut DB</code> reference, therefore it cannot occur during <code>'db</code>.
|
|
<ul>
|
|
<li>The struct may not even exist in the new salsa revision so allowing access would be confusing.</li>
|
|
</ul>
|
|
</li>
|
|
<li>It permits the structs to be implemented using a pointer rather than a <code>salsa::Id</code>, which in turn means more efficient field access (no read locks required).</li>
|
|
</ul>
|
|
<p>This section discusses the unsafe code used for pointer-based access along with the reasoning behind it. To be concrete, we'll focus on tracked structs -- interned structs are very similar.</p>
|
|
<h2 id="a-note-on-ub"><a class="header" href="#a-note-on-ub">A note on UB</a></h2>
|
|
<p>When we say in this page "users cannot do X", we mean without Undefined Behavior (e.g., by transmuting integers around etc).</p>
|
|
<h2 id="proof-obligations"><a class="header" href="#proof-obligations">Proof obligations</a></h2>
|
|
<p>Here is a typical sequence of operations for a tracked struct along with the user operations that will require us to prove unsafe assertions:</p>
|
|
<ul>
|
|
<li>A tracked function <code>f</code> executes in revision R0 and creates a tracked struct with <code>#[id]</code> fields <code>K</code> for the first time.
|
|
<ul>
|
|
<li><code>K</code> will be stored in the interning hashmap and mapped to a fresh identifier <code>id</code>.</li>
|
|
<li>The identifier <code>id</code> will be used as the key in the <code>StructMap</code> and point to a freshly created allocation <code>alloc : Alloc</code>.</li>
|
|
<li>A <code>ts: TS<'db></code> is created from the raw pointer <code>alloc</code> and returned to the user.</li>
|
|
</ul>
|
|
</li>
|
|
<li>The value of the field <code>field</code> is accessed on the tracked struct instance <code>ts</code> by invoking the method <code>ts.field(db)</code>
|
|
<ul>
|
|
<li><em>Unsafe:</em> This accesses the raw pointer to <code>alloc</code>.* A new revision R1 begins.</li>
|
|
</ul>
|
|
</li>
|
|
<li>The tracked function <code>f</code> does not re-execute in R1.</li>
|
|
<li>The value of the field <code>field</code> is accessed on the tracked struct instance <code>ts</code> by invoking the method <code>ts.field(db)</code>
|
|
<ul>
|
|
<li><em>Unsafe:</em> This accesses the raw pointer to <code>alloc</code>.* A new revision R2 begins.</li>
|
|
</ul>
|
|
</li>
|
|
<li>The tracked function <code>f</code> does reexecute in R2 and it again creates a tracked struct with key <code>K</code> and with (Some) distinct field values.
|
|
<ul>
|
|
<li>The fields for <code>ts</code> are updated.</li>
|
|
</ul>
|
|
</li>
|
|
<li>The value of the field <code>field</code> is accessed on the tracked struct instance <code>ts</code> by invoking the method <code>ts.field(db)</code>
|
|
<ul>
|
|
<li><em>Unsafe:</em> This accesses the raw pointer to <code>alloc</code>.</li>
|
|
</ul>
|
|
</li>
|
|
<li>A new revision R3 begins.</li>
|
|
<li>When <code>f</code> executes this time it does NOT create a tracked struct with key <code>K</code>. The tracked struct <code>ts</code> is placed in the "to be deleted" list.</li>
|
|
<li>A new revision R4 begins:
|
|
<ul>
|
|
<li>The allocation <code>alloc</code> is freed.</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<p>As noted in the list, the core "unsafe" operation that users can perform is to access the fields of a tracked struct.
|
|
Tracked structs store a raw pointer to the <code>alloc</code>, owned by the ingredient, that contains their field data.
|
|
Accessing the fields of a tracked struct returns a <code>&</code>-reference to fields stored in that <code>alloc</code>, which means we must ensure Rust's two core constraints are satisfied for the lifetime of that reference:</p>
|
|
<ul>
|
|
<li>The allocation <code>alloc</code> will not be freed (i.e., not be dropped)</li>
|
|
<li>The contents of the fields will not be mutated</li>
|
|
</ul>
|
|
<p>As the sequence above illustrates, we have to show that those two constraints are true in a variety of circumstances:</p>
|
|
<ul>
|
|
<li>newly created tracked structs</li>
|
|
<li>tracked structs that were created in prior revisions and re-validated in this revision</li>
|
|
<li>tracked structs whose fields were updated in this revision</li>
|
|
<li>tracked structs that were <em>not</em> created in this revision</li>
|
|
</ul>
|
|
<h2 id="definitions"><a class="header" href="#definitions">Definitions</a></h2>
|
|
<p>For every tracked struct <code>ts</code> we say that it has a <strong>defining query</strong> <code>f(..)</code>.
|
|
This refers to a particular invocation of the tracked function <code>f</code> with a particular set of arguments <code>..</code>.
|
|
This defining query is unique within a revision, meaning that <code>f</code> executes at most once with that same set of arguments.</p>
|
|
<p>We say that a query has <em>executed in a revision R</em> if its function body was executed. When this occurs, all tracked structs defined (created) by that query will be recorded along with the query's result.</p>
|
|
<p>We say that a query has been <em>validated in a revision R</em> if the salsa system determined that its inputs did not change and so skipped executing it. This also triggers the tracked structs defined by that query to be considered validated (in particular, we execute a function on them which updates some internal fields, as described below).</p>
|
|
<p>When we talk about <code>ts</code>, we mean </p>
|
|
<h2 id="theorem-at-the-start-of-a-new-revision-all-references-to-ts-are-within-salsas-database"><a class="header" href="#theorem-at-the-start-of-a-new-revision-all-references-to-ts-are-within-salsas-database">Theorem: At the start of a new revision, all references to <code>ts</code> are within salsa's database</a></h2>
|
|
<p>After <code>ts</code> is deleted, there may be other memoized values still reference <code>ts</code>, but they must have a red input query.
|
|
<strong>Is this true even if there are user bugs like non-deterministic functions?</strong>
|
|
Argument: yes, because of non-forgery, those memoized values could not be accessed.
|
|
How did those memoized values obtain the <code>TS<'db></code> value in the first place?
|
|
It must have come from a function argument (XX: what about thread-local state).
|
|
Therefore, to access the value, they would have to provide those function arguments again.
|
|
But how did they get them?</p>
|
|
<p>Potential holes:</p>
|
|
<ul>
|
|
<li>Thread-local APIs that let you thread <code>'db</code> values down in an "invisible" way, so that you can return them without them showing up in your arguments -- e.g. a tracked function <code>() -> S<'db></code> that obtains its value from thread-local state.
|
|
<ul>
|
|
<li>We might be able to sanity check against this with enough effort by defining some traits that guarantee that every lifetime tagged thing in your result <em>could have</em> come from one of your arguments, but I don't think we can prove it altogether. We either have to tell users "don't do that" or we need to have some kind of dynamic check, e.g. with a kind of versioned pointer. Note that it does require unsafe code at present but only because of the limits of our existing APIs.</li>
|
|
<li>Alternatively we can do a better job cleaning up deleted stuff. This we could do.</li>
|
|
</ul>
|
|
</li>
|
|
<li>what about weird <code>Eq</code> implementations and the like? Do we have to make those unsafe?</li>
|
|
</ul>
|
|
<h2 id="theorem-to-access-a-tracked-struct-ts-in-revision-r-the-defining-query-f-must-have-either-executed-or-been-validated-in-the-revision-r"><a class="header" href="#theorem-to-access-a-tracked-struct-ts-in-revision-r-the-defining-query-f-must-have-either-executed-or-been-validated-in-the-revision-r">Theorem: To access a tracked struct <code>ts</code> in revision R, the defining query <code>f(..)</code> must have either <em>executed</em> or been <em>validated</em> in the revision R.</a></h2>
|
|
<p>This is the core bit of reasoning underlying most of what follows.
|
|
The idea is that users cannot "forge" a tracked struct instance <code>ts</code>.
|
|
They must have gotten it through salsa's internal mechanisms.
|
|
This is important because salsa will provide <code>&</code>-references to fields within that remain valid during a revision.
|
|
But at the start of a new revision salsa may opt to modify those fields or even free the allocation.
|
|
This is safe because users cannot have references to <code>ts</code> at the start of a new revision.</p>
|
|
<h3 id="lemma"><a class="header" href="#lemma">Lemma</a></h3>
|
|
<p>We will prove it by proceeding through the revisions in the life cycle above (this can be considered a proof by induction).</p>
|
|
<h3 id="before-ts-is-first-created-in-r0"><a class="header" href="#before-ts-is-first-created-in-r0">Before <code>ts</code> is first created in R0</a></h3>
|
|
<p>Users must have originally obtained <code>ts: TS<'db></code> by invoking <code>TS::new(&db, ...)</code>.
|
|
This is because creating an instance of <code>TS</code> requires providing a <code>NonNull<salsa::tracked_struct::ValueStruct></code> pointer
|
|
to an unsafe function whose contract requires the pointer's validity.</p>
|
|
<p><strong>FIXME:</strong> This is not strictly true, I think the constructor is just a private tuple ctor, we should fix that.</p>
|
|
<h3 id="during-r0"><a class="header" href="#during-r0">During R0</a></h3>
|
|
<h3 id=""><a class="header" href="#"></a></h3>
|
|
<h3 id="inductive-case-consider-some-revision-r"><a class="header" href="#inductive-case-consider-some-revision-r">Inductive case: Consider some revision R</a></h3>
|
|
<p>We start by showing some circumstances that cannot occur:</p>
|
|
<ul>
|
|
<li>accessing the field of a tracked struct <code>ts</code> that was never created</li>
|
|
<li>accessing the field of a tracked struct <code>ts</code> after it is freed</li>
|
|
</ul>
|
|
<h3 id="lemma-no-forgery-users-cannot-forge-a-tracked-struct"><a class="header" href="#lemma-no-forgery-users-cannot-forge-a-tracked-struct">Lemma (no forgery): Users cannot forge a tracked struct</a></h3>
|
|
<p>The first observation is that users cannot "forge" an instance of a tracked struct <code>ts</code>.
|
|
They are required to produce a pointer to an <code>Alloc</code>.
|
|
This implies that every tracked struct <code>ts</code> originated in the ingredient.
|
|
The same is not true for input structs, for example, because they are created from integer identifiers and users could just make those up.</p>
|
|
<h3 id="lemma-within-one-rev-users-cannot-hold-a-tracked-struct-ts-across-revisions"><a class="header" href="#lemma-within-one-rev-users-cannot-hold-a-tracked-struct-ts-across-revisions">Lemma (within one rev): Users cannot hold a tracked struct <code>ts</code> across revisions</a></h3>
|
|
<p>The lifetime <code>'db</code> of the tracked struct <code>ts: TS<'db></code> is created from a <code>db: &'db dyn Db</code> handle.
|
|
Beginning a new revision requires an <code>&mut</code> reference.
|
|
Therefore so long as users are actively using the value <code>ts</code> the database cannot start a new revision.</p>
|
|
<p><em>Check:</em> What if users had two databases and invoked internal methods? Maybe they could then. We may have to add some assertions.</p>
|
|
<h3 id="theorem-in-order-to-get-a-tracked-struct-ts-in-revision-r0-the-tracked-fn-f-that-creates-it-must-either-execute-or-be-validated-first"><a class="header" href="#theorem-in-order-to-get-a-tracked-struct-ts-in-revision-r0-the-tracked-fn-f-that-creates-it-must-either-execute-or-be-validated-first">Theorem: In order to get a tracked struct <code>ts</code> in revision R0, the tracked fn <code>f</code> that creates it must either <em>execute</em> or <em>be validated</em> first</a></h3>
|
|
<p>The two points above combine to </p>
|
|
<h2 id="creating-new-values"><a class="header" href="#creating-new-values">Creating new values</a></h2>
|
|
<p>Each new value is stored in a <code>salsa::alloc::Alloc</code> created by <code>StructMap::insert</code>.
|
|
<code>Alloc</code> is a variant of the standard Rust <code>Box</code> that carries no uniqueness implications.
|
|
This means that every tracked struct has its own allocation.
|
|
This allocation is owned by the tracked struct ingredient
|
|
and thus stays live until the tracked struct ingredient is dropped
|
|
or until it is removed (see later for safety conditions around removal).</p>
|
|
<h2 id="the-user-type-uses-a-raw-pointer"><a class="header" href="#the-user-type-uses-a-raw-pointer">The user type uses a raw pointer</a></h2>
|
|
<p>The <code>#[salsa::tracked]</code> macro creates a user-exposed struct that looks roughly like this:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>// This struct is a wrapper around the actual fields that adds
|
|
// some revision metadata. You can think of it as a newtype'd
|
|
// version of the fields of the tracked struct.
|
|
use salsa::tracked_struct::ValueStruct;
|
|
|
|
struct MyTrackedStruct<'db> {
|
|
value: *const ValueStruct<..>,
|
|
phantom: PhantomData<&'db ValueStruct<...>>
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Key observations:</p>
|
|
<ul>
|
|
<li>The actual pointer to the <code>ValueStruct</code> used at runtime is not a Rust reference but a raw pointer. This is needed for stacked borrows.</li>
|
|
<li>A <code>PhantomData</code> is used to keep the <code>'db</code> lifetime alive.</li>
|
|
</ul>
|
|
<p>The reason we use a raw pointer in the struct is because instances of this struct will outlive the <code>'db</code> lifetime. Consider this example:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>let mut db = MyDatabase::default();
|
|
let input = MyInput::new(&db, ...);
|
|
|
|
// Revision 1:
|
|
let result1 = tracked_fn(&db, input);
|
|
|
|
// Revision 2:
|
|
input.set_field(&mut db).to(...);
|
|
let result2 = tracked_fn(&db, input);
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>Tracked structs created by <code>tracked_fn</code> during Revision 1
|
|
may be reused during Revision 2, but the original <code>&db</code> reference
|
|
used to create them has expired.
|
|
If we stored a true Rust reference, that would be a violation of
|
|
the stacked borrows rules.</p>
|
|
<p>Instead, we store a raw pointer and,
|
|
whenever users invoke the accessor methods for particular fields,
|
|
we create a new reference to the contents:</p>
|
|
<pre><pre class="playground"><code class="language-rust">
|
|
<span class="boring">#![allow(unused)]
|
|
</span><span class="boring">fn main() {
|
|
</span>impl<'db> MyTrackedStruct<'db> {
|
|
fn field(self, db: &'db dyn DB) -> &'db FieldType {
|
|
...
|
|
}
|
|
}
|
|
<span class="boring">}
|
|
</span></code></pre></pre>
|
|
<p>This reference is linked to <code>db</code> and remains valid so long as the </p>
|
|
<h2 id="the-db-lifetime-at-rest"><a class="header" href="#the-db-lifetime-at-rest">The <code>'db</code> lifetime at rest</a></h2>
|
|
<h2 id="updating-tracked-struct-fields-across-revisions"><a class="header" href="#updating-tracked-struct-fields-across-revisions">Updating tracked struct fields across revisions</a></h2>
|
|
<h3 id="the-xx"><a class="header" href="#the-xx">The <code>XX</code></a></h3>
|
|
<h2 id="safety-lemmas"><a class="header" href="#safety-lemmas">Safety lemmas</a></h2>
|
|
<p>These lemmas are used to justify the safety of the system.</p>
|
|
<h3 id="using-mytrackeddb-within-some-revision-r-always-happens-after-a-call-to-mytrackednew"><a class="header" href="#using-mytrackeddb-within-some-revision-r-always-happens-after-a-call-to-mytrackednew">Using <code>MyTracked<'db></code> within some revision R always "happens after' a call to <code>MyTracked::new</code></a></h3>
|
|
<p>Whenever a tracked struct instance <code>TS<'db></code> is created for the first time in revision R1,
|
|
the result is a fresh allocation and hence there cannot be any
|
|
pre-existing aliases of that struct.</p>
|
|
<p><code>TS<'db></code> will at that time be stored into the salsa database.
|
|
In later revisions, we assert that </p>
|
|
<h3 id="db-t-references-are-never-stored-in-the-database"><a class="header" href="#db-t-references-are-never-stored-in-the-database"><code>&'db T</code> references are never stored in the database</a></h3>
|
|
<p>We maintain the invariant that, in any later revision R2, </p>
|
|
<p>However in some later revision R2, how </p>
|
|
<h2 id="ways-this-could-go-wrong-and-how-we-prevent-them"><a class="header" href="#ways-this-could-go-wrong-and-how-we-prevent-them">Ways this could go wrong and how we prevent them</a></h2>
|
|
<h3 id="-1"><a class="header" href="#-1"></a></h3>
|
|
<h3 id="storing-an-db-t-into-a-field"><a class="header" href="#storing-an-db-t-into-a-field">Storing an <code>&'db T</code> into a field</a></h3>
|
|
<h3 id="freeing-the-memory-while-a-tracked-struct-remains-live"><a class="header" href="#freeing-the-memory-while-a-tracked-struct-remains-live">Freeing the memory while a tracked struct remains live</a></h3>
|
|
<h3 id="aliases-of-a-tracked-struct"><a class="header" href="#aliases-of-a-tracked-struct">Aliases of a tracked struct</a></h3>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="tracked-structs-1"><a class="header" href="#tracked-structs-1">Tracked structs</a></h1>
|
|
<p>Tracked structs are stored in a special way to reduce their costs.</p>
|
|
<p>Tracked structs are created via a <code>new</code> operation.</p>
|
|
<h2 id="the-tracked-struct-and-tracked-field-ingredients"><a class="header" href="#the-tracked-struct-and-tracked-field-ingredients">The tracked struct and tracked field ingredients</a></h2>
|
|
<p>For a single tracked struct we create multiple ingredients.
|
|
The <strong>tracked struct ingredient</strong> is the ingredient created first.
|
|
It offers methods to create new instances of the struct and therefore
|
|
has unique access to the interner and hashtables used to create the struct id.
|
|
It also shares access to a hashtable that stores the <code>ValueStruct</code> that
|
|
contains the field data.</p>
|
|
<p>For each field, we create a <strong>tracked field ingredient</strong> that moderates access
|
|
to a particular field. All of these ingredients use that same shared hashtable
|
|
to access the <code>ValueStruct</code> instance for a given id. The <code>ValueStruct</code>
|
|
contains both the field values but also the revisions when they last changed value.</p>
|
|
<h2 id="each-tracked-struct-has-a-globally-unique-id"><a class="header" href="#each-tracked-struct-has-a-globally-unique-id">Each tracked struct has a globally unique id</a></h2>
|
|
<p>This will begin by creating a <em>globally unique, 32-bit id</em> for the tracked struct. It is created by interning a combination of</p>
|
|
<ul>
|
|
<li>the currently executing query;</li>
|
|
<li>a u64 hash of the <code>#[id]</code> fields;</li>
|
|
<li>a <em>disambiguator</em> that makes this hash unique within the current query. i.e., when a query starts executing, it creates an empty map, and the first time a tracked struct with a given hash is created, it gets disambiguator 0. The next one will be given 1, etc.</li>
|
|
</ul>
|
|
<h2 id="each-tracked-struct-has-a-valuestruct-storing-its-data"><a class="header" href="#each-tracked-struct-has-a-valuestruct-storing-its-data">Each tracked struct has a <code>ValueStruct</code> storing its data</a></h2>
|
|
<p>The struct and field ingredients share access to a hashmap that maps
|
|
each field id to a value struct:</p>
|
|
<pre><code class="language-rust ignore">#[derive(Debug)]
|
|
pub struct ValueStruct<C>
|
|
where
|
|
C: Configuration,
|
|
{
|
|
/// Index of the struct ingredient.
|
|
struct_ingredient_index: IngredientIndex,
|
|
|
|
/// The id of this struct in the ingredient.
|
|
id: Id,
|
|
|
|
/// The key used to create the id.
|
|
key: KeyStruct,
|
|
|
|
/// The durability minimum durability of all inputs consumed
|
|
/// by the creator query prior to creating this tracked struct.
|
|
/// If any of those inputs changes, then the creator query may
|
|
/// create this struct with different values.
|
|
durability: Durability,
|
|
|
|
/// The revision when this entity was most recently created.
|
|
/// Typically the current revision.
|
|
/// Used to detect "leaks" outside of the salsa system -- i.e.,
|
|
/// access to tracked structs that have not (yet?) been created in the
|
|
/// current revision. This should be impossible within salsa queries
|
|
/// but it can happen through "leaks" like thread-local data or storing
|
|
/// values outside of the root salsa query.
|
|
created_at: Revision,
|
|
|
|
/// Fields of this tracked struct. They can change across revisions,
|
|
/// but they do not change within a particular revision.
|
|
fields: C::Fields<'static>,
|
|
|
|
/// The revision information for each field: when did this field last change.
|
|
/// When tracked structs are re-created, this revision may be updated to the
|
|
/// current revision if the value is different.
|
|
revisions: C::Revisions,
|
|
}
|
|
</code></pre>
|
|
<p>The value struct stores the values of the fields but also the revisions when
|
|
that field last changed. Each time the struct is recreated in a new revision,
|
|
the old and new values for its fields are compared and a new revision is created.</p>
|
|
<h2 id="the-macro-generates-the-tracked-struct-configuration"><a class="header" href="#the-macro-generates-the-tracked-struct-configuration">The macro generates the tracked struct <code>Configuration</code></a></h2>
|
|
<p>The "configuration" for a tracked struct defines not only the types of the fields,
|
|
but also various important operations such as extracting the hashable id fields
|
|
and updating the "revisions" to track when a field last changed:</p>
|
|
<pre><code class="language-rust ignore">/// Trait that defines the key properties of a tracked struct.
|
|
/// Implemented by the `#[salsa::tracked]` macro when applied
|
|
/// to a struct.
|
|
pub trait Configuration: Sized {
|
|
/// A (possibly empty) tuple of the fields for this struct.
|
|
type Fields<'db>;
|
|
|
|
/// A array of [`Revision`][] values, one per each of the value fields.
|
|
/// When a struct is re-recreated in a new revision, the corresponding
|
|
/// entries for each field are updated to the new revision if their
|
|
/// values have changed (or if the field is marked as `#[no_eq]`).
|
|
type Revisions;
|
|
|
|
type Struct<'db>: Copy;
|
|
|
|
/// Create an end-user struct from the underlying raw pointer.
|
|
///
|
|
/// This call is an "end-step" to the tracked struct lookup/creation
|
|
/// process in a given revision: it occurs only when the struct is newly
|
|
/// created or, if a struct is being reused, after we have updated its
|
|
/// fields (or confirmed it is green and no updates are required).
|
|
///
|
|
/// # Safety
|
|
///
|
|
/// Requires that `ptr` represents a "confirmed" value in this revision,
|
|
/// which means that it will remain valid and immutable for the remainder of this
|
|
/// revision, represented by the lifetime `'db`.
|
|
unsafe fn struct_from_raw<'db>(ptr: NonNull<ValueStruct<Self>>) -> Self::Struct<'db>;
|
|
|
|
/// Deref the struct to yield the underlying value struct.
|
|
/// Since we are still part of the `'db` lifetime in which the struct was created,
|
|
/// this deref is safe, and the value-struct fields are immutable and verified.
|
|
fn deref_struct(s: Self::Struct<'_>) -> &ValueStruct<Self>;
|
|
|
|
fn id_fields(fields: &Self::Fields<'_>) -> impl Hash;
|
|
|
|
/// Access the revision of a given value field.
|
|
/// `field_index` will be between 0 and the number of value fields.
|
|
fn revision(revisions: &Self::Revisions, field_index: u32) -> Revision;
|
|
|
|
/// Create a new value revision array where each element is set to `current_revision`.
|
|
fn new_revisions(current_revision: Revision) -> Self::Revisions;
|
|
|
|
/// Update the field data and, if the value has changed,
|
|
/// the appropriate entry in the `revisions` array.
|
|
///
|
|
/// # Safety
|
|
///
|
|
/// Requires the same conditions as the `maybe_update`
|
|
/// method on [the `Update` trait](`crate::update::Update`).
|
|
///
|
|
/// In short, requires that `old_fields` be a pointer into
|
|
/// storage from a previous revision.
|
|
/// It must meet its validity invariant.
|
|
/// Owned content must meet safety invariant.
|
|
/// `*mut` here is not strictly needed;
|
|
/// it is used to signal that the content
|
|
/// is not guaranteed to recursively meet
|
|
/// its safety invariant and
|
|
/// hence this must be dereferenced with caution.
|
|
///
|
|
/// Ensures that `old_fields` is fully updated and valid
|
|
/// after it returns and that `revisions` has been updated
|
|
/// for any field that changed.
|
|
unsafe fn update_fields<'db>(
|
|
current_revision: Revision,
|
|
revisions: &mut Self::Revisions,
|
|
old_fields: *mut Self::Fields<'db>,
|
|
new_fields: Self::Fields<'db>,
|
|
);
|
|
}
|
|
</code></pre>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="query-operations"><a class="header" href="#query-operations">Query operations</a></h1>
|
|
<p>Each of the query storage struct implements the <code>QueryStorageOps</code> trait found in the <a href="https://github.com/salsa-rs/salsa/blob/master/src/plumbing.rs"><code>plumbing</code></a> module:</p>
|
|
<pre><code class="language-rust no_run noplayground">pub trait QueryStorageOps<Q>
|
|
where
|
|
Self: QueryStorageMassOps,
|
|
Q: Query,
|
|
{
|
|
</code></pre>
|
|
<p>which defines the basic operations that all queries support. The most important are these two:</p>
|
|
<ul>
|
|
<li><a href="plumbing/./maybe_changed_after.html">maybe changed after</a>: Returns true if the value of the query (for the given key) may have changed since the given revision.</li>
|
|
<li><a href="plumbing/./fetch.html">Fetch</a>: Returns the up-to-date value for the given K (or an error in the case of an "unrecovered" cycle).</li>
|
|
</ul>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="maybe-changed-after"><a class="header" href="#maybe-changed-after">Maybe changed after</a></h1>
|
|
<pre><code class="language-rust no_run noplayground"> /// True if the value of `input`, which must be from this query, may have
|
|
/// changed after the given revision ended.
|
|
///
|
|
/// This function should only be invoked with a revision less than the current
|
|
/// revision.
|
|
fn maybe_changed_after(
|
|
&self,
|
|
db: &<Q as QueryDb<'_>>::DynDb,
|
|
input: DatabaseKeyIndex,
|
|
revision: Revision,
|
|
) -> bool;
|
|
</code></pre>
|
|
<p>The <code>maybe_changed_after</code> operation computes whether a query's value <em>may have changed</em> <strong>after</strong> the given revision. In other words, <code>Q.maybe_change_since(R)</code> is true if the value of the query <code>Q</code> may have changed in the revisions <code>(R+1)..R_now</code>, where <code>R_now</code> is the current revision. Note that it doesn't make sense to ask <code>maybe_changed_after(R_now)</code>.</p>
|
|
<h2 id="input-queries"><a class="header" href="#input-queries">Input queries</a></h2>
|
|
<p>Input queries are set explicitly by the user. <code>maybe_changed_after</code> can therefore just check when the value was last set and compare.</p>
|
|
<h2 id="interned-queries"><a class="header" href="#interned-queries">Interned queries</a></h2>
|
|
<h2 id="derived-queries"><a class="header" href="#derived-queries">Derived queries</a></h2>
|
|
<p>The logic for derived queries is more complex. We summarize the high-level ideas here, but you may find the <a href="plumbing/./derived_flowchart.html">flowchart</a> useful to dig deeper. The <a href="plumbing/./terminology.html">terminology</a> section may also be useful; in some cases, we link to that section on the first usage of a word.</p>
|
|
<ul>
|
|
<li>If an existing <a href="plumbing/./terminology/memo.html">memo</a> is found, then we check if the memo was <a href="plumbing/./terminology/verified.html">verified</a> in the current <a href="plumbing/./terminology/revision.html">revision</a>. If so, we can compare its <a href="plumbing/./terminology/changed_at.html">changed at</a> revision and return true or false appropriately.</li>
|
|
<li>Otherwise, we must check whether <a href="plumbing/./terminology/dependency.html">dependencies</a> have been modified:
|
|
<ul>
|
|
<li>Let R be the revision in which the memo was last verified; we wish to know if any of the dependencies have changed since revision R.</li>
|
|
<li>First, we check the <a href="plumbing/./terminology/durability.html">durability</a>. For each memo, we track the minimum durability of the memo's dependencies. If the memo has durability D, and there have been no changes to an input with durability D since the last time the memo was verified, then we can consider the memo verified without any further work.</li>
|
|
<li>If the durability check is not sufficient, then we must check the dependencies individually. For this, we iterate over each dependency D and invoke the <a href="plumbing/./maybe_changed_after.html">maybe changed after</a> operation to check whether D has changed since the revision R.</li>
|
|
<li>If no dependency was modified:
|
|
<ul>
|
|
<li>We can mark the memo as verified and use its <a href="plumbing/./terminology/changed_at.html">changed at</a> revision to return true or false.</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>Assuming dependencies have been modified:
|
|
<ul>
|
|
<li>Then we execute the user's query function (same as in <a href="plumbing/./fetch.html">fetch</a>), which potentially <a href="plumbing/./terminology/backdate.html">backdates</a> the resulting value.</li>
|
|
<li>Compare the <a href="plumbing/./terminology/changed_at.html">changed at</a> revision in the resulting memo and return true or false.</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="fetch"><a class="header" href="#fetch">Fetch</a></h1>
|
|
<pre><code class="language-rust no_run noplayground"> /// Execute the query, returning the result (often, the result
|
|
/// will be memoized). This is the "main method" for
|
|
/// queries.
|
|
///
|
|
/// Returns `Err` in the event of a cycle, meaning that computing
|
|
/// the value for this `key` is recursively attempting to fetch
|
|
/// itself.
|
|
fn fetch(&self, db: &<Q as QueryDb<'_>>::DynDb, key: &Q::Key) -> Q::Value;
|
|
</code></pre>
|
|
<p>The <code>fetch</code> operation computes the value of a query. It prefers to reuse memoized values when it can.</p>
|
|
<h2 id="input-queries-1"><a class="header" href="#input-queries-1">Input queries</a></h2>
|
|
<p>Input queries simply load the result from the table.</p>
|
|
<h2 id="interned-queries-1"><a class="header" href="#interned-queries-1">Interned queries</a></h2>
|
|
<p>Interned queries map the input into a hashmap to find an existing integer. If none is present, a new value is created.</p>
|
|
<h2 id="derived-queries-1"><a class="header" href="#derived-queries-1">Derived queries</a></h2>
|
|
<p>The logic for derived queries is more complex. We summarize the high-level ideas here, but you may find the <a href="plumbing/./derived_flowchart.html">flowchart</a> useful to dig deeper. The <a href="plumbing/./terminology.html">terminology</a> section may also be useful; in some cases, we link to that section on the first usage of a word.</p>
|
|
<ul>
|
|
<li>If an existing <a href="plumbing/./terminology/memo.html">memo</a> is found, then we check if the memo was <a href="plumbing/./terminology/verified.html">verified</a> in the current <a href="plumbing/./terminology/revision.html">revision</a>. If so, we can directly return the memoized value.</li>
|
|
<li>Otherwise, if the memo contains a memoized value, we must check whether <a href="plumbing/./terminology/dependency.html">dependencies</a> have been modified:
|
|
<ul>
|
|
<li>Let R be the revision in which the memo was last verified; we wish to know if any of the dependencies have changed since revision R.</li>
|
|
<li>First, we check the <a href="plumbing/./terminology/durability.html">durability</a>. For each memo, we track the minimum durability of the memo's dependencies. If the memo has durability D, and there have been no changes to an input with durability D since the last time the memo was verified, then we can consider the memo verified without any further work.</li>
|
|
<li>If the durability check is not sufficient, then we must check the dependencies individually. For this, we iterate over each dependency D and invoke the <a href="plumbing/./maybe_changed_after.html">maybe changed after</a> operation to check whether D has changed since the revision R.</li>
|
|
<li>If no dependency was modified:
|
|
<ul>
|
|
<li>We can mark the memo as verified and return its memoized value.</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>Assuming dependencies have been modified or the memo does not contain a memoized value:
|
|
<ul>
|
|
<li>Then we execute the user's query function.</li>
|
|
<li>Next, we compute the revision in which the memoized value last changed:
|
|
<ul>
|
|
<li><em>Backdate:</em> If there was a previous memoized value, and the new value is equal to that old value, then we can <em>backdate</em> the memo, which means to use the 'changed at' revision from before.
|
|
<ul>
|
|
<li>Thanks to backdating, it is possible for a dependency of the query to have changed in some revision R1 but for the <em>output</em> of the query to have changed in some revision R2 where R2 predates R1.</li>
|
|
</ul>
|
|
</li>
|
|
<li>Otherwise, we use the current revision.</li>
|
|
</ul>
|
|
</li>
|
|
<li>Construct a memo for the new value and return it.</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="derived-queries-flowchart"><a class="header" href="#derived-queries-flowchart">Derived queries flowchart</a></h1>
|
|
<p>Derived queries are by far the most complex. This flowchart documents the flow of the <a href="plumbing/./maybe_changed_after.html">maybe changed after</a> and <a href="plumbing/./fetch.html">fetch</a> operations. This flowchart can be edited on <a href="https://draw.io">draw.io</a>:</p>
|
|
<!-- The explicit div is there because, otherwise, the flowchart is unreadable when using "dark mode" -->
|
|
<div style="background-color:white;">
|
|
<p><img src="plumbing/../derived-query-read.drawio.svg" alt="Flowchart" /></p>
|
|
</div>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="cycles"><a class="header" href="#cycles">Cycles</a></h1>
|
|
<h2 id="cross-thread-blocking"><a class="header" href="#cross-thread-blocking">Cross-thread blocking</a></h2>
|
|
<p>The interface for blocking across threads now works as follows:</p>
|
|
<ul>
|
|
<li>When one thread <code>T1</code> wishes to block on a query <code>Q</code> being executed by another thread <code>T2</code>, it invokes <code>Runtime::try_block_on</code>. This will check for cycles. Assuming no cycle is detected, it will block <code>T1</code> until <code>T2</code> has completed with <code>Q</code>. At that point, <code>T1</code> reawakens. However, we don't know the result of executing <code>Q</code>, so <code>T1</code> now has to "retry". Typically, this will result in successfully reading the cached value.</li>
|
|
<li>While <code>T1</code> is blocking, the runtime moves its query stack (a <code>Vec</code>) into the shared dependency graph data structure. When <code>T1</code> reawakens, it recovers ownership of its query stack before returning from <code>try_block_on</code>.</li>
|
|
</ul>
|
|
<h2 id="cycle-detection"><a class="header" href="#cycle-detection">Cycle detection</a></h2>
|
|
<p>When a thread <code>T1</code> attempts to execute a query <code>Q</code>, it will try to load the value for <code>Q</code> from the memoization tables. If it finds an <code>InProgress</code> marker, that indicates that <code>Q</code> is currently being computed. This indicates a potential cycle. <code>T1</code> will then try to block on the query <code>Q</code>:</p>
|
|
<ul>
|
|
<li>If <code>Q</code> is also being computed by <code>T1</code>, then there is a cycle.</li>
|
|
<li>Otherwise, if <code>Q</code> is being computed by some other thread <code>T2</code>, we have to check whether <code>T2</code> is (transitively) blocked on <code>T1</code>. If so, there is a cycle.</li>
|
|
</ul>
|
|
<p>These two cases are handled internally by the <code>Runtime::try_block_on</code> function. Detecting the intra-thread cycle case is easy; to detect cross-thread cycles, the runtime maintains a dependency DAG between threads (identified by <code>RuntimeId</code>). Before adding an edge <code>T1 -> T2</code> (i.e., <code>T1</code> is blocked waiting for <code>T2</code>) into the DAG, it checks whether a path exists from <code>T2</code> to <code>T1</code>. If so, we have a cycle and the edge cannot be added (then the DAG would not longer be acyclic).</p>
|
|
<p>When a cycle is detected, the current thread <code>T1</code> has full access to the query stacks that are participating in the cycle. Consider: naturally, <code>T1</code> has access to its own stack. There is also a path <code>T2 -> ... -> Tn -> T1</code> of blocked threads. Each of the blocked threads <code>T2 ..= Tn</code> will have moved their query stacks into the dependency graph, so those query stacks are available for inspection.</p>
|
|
<p>Using the available stacks, we can create a list of cycle participants <code>Q0 ... Qn</code> and store that into a <code>Cycle</code> struct. If none of the participants <code>Q0 ... Qn</code> have cycle recovery enabled, we panic with the <code>Cycle</code> struct, which will trigger all the queries on this thread to panic.</p>
|
|
<h2 id="cycle-recovery-via-fallback"><a class="header" href="#cycle-recovery-via-fallback">Cycle recovery via fallback</a></h2>
|
|
<p>If any of the cycle participants <code>Q0 ... Qn</code> has cycle recovery set, we recover from the cycle. To help explain how this works, we will use this example cycle which contains three threads. Beginning with the current query, the cycle participants are <code>QA3</code>, <code>QB2</code>, <code>QB3</code>, <code>QC2</code>, <code>QC3</code>, and <code>QA2</code>.</p>
|
|
<pre><code> The cyclic
|
|
edge we have
|
|
failed to add.
|
|
:
|
|
A : B C
|
|
:
|
|
QA1 v QB1 QC1
|
|
┌► QA2 ┌──► QB2 ┌─► QC2
|
|
│ QA3 ───┘ QB3 ──┘ QC3 ───┐
|
|
│ │
|
|
└───────────────────────────────┘
|
|
</code></pre>
|
|
<p>Recovery works in phases:</p>
|
|
<ul>
|
|
<li><strong>Analyze:</strong> As we enumerate the query participants, we collect their collective inputs (all queries invoked so far by any cycle participant) and the max changed-at and min duration. We then remove the cycle participants themselves from this list of inputs, leaving only the queries external to the cycle.</li>
|
|
<li><strong>Mark</strong>: For each query Q that is annotated with <code>#[salsa::cycle]</code>, we mark it and all of its successors on the same thread by setting its <code>cycle</code> flag to the <code>c: Cycle</code> we constructed earlier; we also reset its inputs to the collective inputs gathering during analysis. If those queries resume execution later, those marks will trigger them to immediately unwind and use cycle recovery, and the inputs will be used as the inputs to the recovery value.
|
|
<ul>
|
|
<li>Note that we mark <em>all</em> the successors of Q on the same thread, whether or not they have recovery set. We'll discuss later how this is important in the case where the active thread (A, here) doesn't have any recovery set.</li>
|
|
</ul>
|
|
</li>
|
|
<li><strong>Unblock</strong>: Each blocked thread T that has a recovering query is forcibly reawoken; the outgoing edge from that thread to its successor in the cycle is removed. Its condvar is signalled with a <code>WaitResult::Cycle(c)</code>. When the thread reawakens, it will see that and start unwinding with the cycle <code>c</code>.</li>
|
|
<li><strong>Handle the current thread:</strong> Finally, we have to choose how to have the current thread proceed. If the current thread includes any cycle with recovery information, then we can begin unwinding. Otherwise, the current thread simply continues as if there had been no cycle, and so the cyclic edge is added to the graph and the current thread blocks. This is possible because some other thread had recovery information and therefore has been awoken.</li>
|
|
</ul>
|
|
<p>Let's walk through the process with a few examples.</p>
|
|
<h3 id="example-1-recovery-on-the-detecting-thread"><a class="header" href="#example-1-recovery-on-the-detecting-thread">Example 1: Recovery on the detecting thread</a></h3>
|
|
<p>Consider the case where only the query QA2 has recovery set. It and QA3 will be marked with their <code>cycle</code> flag set to <code>c: Cycle</code>. Threads B and C will not be unblocked, as they do not have any cycle recovery nodes. The current thread (Thread A) will initiate unwinding with the cycle <code>c</code> as the value. Unwinding will pass through QA3 and be caught by QA2. QA2 will substitute the recovery value and return normally. QA1 and QC3 will then complete normally and so forth, on up until all queries have completed.</p>
|
|
<h3 id="example-2-recovery-in-two-queries-on-the-detecting-thread"><a class="header" href="#example-2-recovery-in-two-queries-on-the-detecting-thread">Example 2: Recovery in two queries on the detecting thread</a></h3>
|
|
<p>Consider the case where both query QA2 and QA3 have recovery set. It proceeds the same Example 1 until the the current initiates unwinding, as described in Example 1. When QA3 receives the cycle, it stores its recovery value and completes normally. QA2 then adds QA3 as an input dependency: at that point, QA2 observes that it too has the cycle mark set, and so it initiates unwinding. The rest of QA2 therefore never executes. This unwinding is caught by QA2's entry point and it stores the recovery value and returns normally. QA1 and QC3 then continue normally, as they have not had their <code>cycle</code> flag set.</p>
|
|
<h3 id="example-3-recovery-on-another-thread"><a class="header" href="#example-3-recovery-on-another-thread">Example 3: Recovery on another thread</a></h3>
|
|
<p>Now consider the case where only the query QB2 has recovery set. It and QB3 will be marked with the cycle <code>c: Cycle</code> and thread B will be unblocked; the edge <code>QB3 -> QC2</code> will be removed from the dependency graph. Thread A will then add an edge <code>QA3 -> QB2</code> and block on thread B. At that point, thread A releases the lock on the dependency graph, and so thread B is re-awoken. It observes the <code>WaitResult::Cycle</code> and initiates unwinding. Unwinding proceeds through QB3 and into QB2, which recovers. QB1 is then able to execute normally, as is QA3, and execution proceeds from there.</p>
|
|
<h3 id="example-4-recovery-on-all-queries"><a class="header" href="#example-4-recovery-on-all-queries">Example 4: Recovery on all queries</a></h3>
|
|
<p>Now consider the case where all the queries have recovery set. In that case, they are all marked with the cycle, and all the cross-thread edges are removed from the graph. Each thread will independently awaken and initiate unwinding. Each query will recover.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="terminology"><a class="header" href="#terminology">Terminology</a></h1>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="backdate"><a class="header" href="#backdate">Backdate</a></h1>
|
|
<p><em>Backdating</em> is when we mark a value that was computed in revision R as having last changed in some earlier revision. This is done when we have an older <a href="plumbing/terminology/./memo.html">memo</a> M and we can compare the two values to see that, while the <a href="plumbing/terminology/./dependency.html">dependencies</a> to M may have changed, the result of the <a href="plumbing/terminology/./query_function.html">query function</a> did not.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="changed-at"><a class="header" href="#changed-at">Changed at</a></h1>
|
|
<p>The <em>changed at</em> revision for a <a href="plumbing/terminology/./memo.html">memo</a> is the <a href="plumbing/terminology/./revision.html">revision</a> in which that memo's value last changed. Typically, this is the same as the revision in which the <a href="plumbing/terminology/./query_function.html">query function</a> was last executed, but it may be an earlier revision if the memo was <a href="plumbing/terminology/./backdate.html">backdated</a>.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="dependency"><a class="header" href="#dependency">Dependency</a></h1>
|
|
<p>A <em>dependency</em> of a <a href="plumbing/terminology/./query.html">query</a> Q is some other query Q1 that was invoked as part of computing the value for Q (typically, invoking by Q's <a href="plumbing/terminology/./query_function.html">query function</a>).</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="derived-query"><a class="header" href="#derived-query">Derived query</a></h1>
|
|
<p>A <em>derived query</em> is a <a href="plumbing/terminology/./query.html">query</a> whose value is defined by the result of a user-provided <a href="plumbing/terminology/./query_function.html">query function</a>. That function is executed to get the result of the query. Unlike <a href="plumbing/terminology/./input_query.html">input queries</a>, the result of a derived queries can always be recomputed whenever needed simply by re-executing the function.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="durability-1"><a class="header" href="#durability-1">Durability</a></h1>
|
|
<p><em>Durability</em> is an optimization that we use to avoid checking the <a href="plumbing/terminology/./dependency.html">dependencies</a> of a <a href="plumbing/terminology/./query.html">query</a> individually.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="input-query"><a class="header" href="#input-query">Input query</a></h1>
|
|
<p>An <em>input query</em> is a <a href="plumbing/terminology/./query.html">query</a> whose value is explicitly set by the user. When that value is set, a <a href="plumbing/terminology/./durability.html">durability</a> can also be provided.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="ingredient"><a class="header" href="#ingredient">Ingredient</a></h1>
|
|
<p>An <em>ingredient</em> is an individual piece of storage used to create a <a href="plumbing/terminology/./salsa_item.html">salsa item</a>
|
|
See the <a href="plumbing/terminology/../jars_and_ingredients.html">jars and ingredients</a> chapter for more details.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="lru"><a class="header" href="#lru">LRU</a></h1>
|
|
<p>The <a href="https://docs.rs/salsa/0.16.1/salsa/struct.QueryTableMut.html#method.set_lru_capacity"><code>set_lru_capacity</code></a> method can be used to fix the maximum capacity for a query at a specific number of values. If more values are added after that point, then salsa will drop the values from older <a href="plumbing/terminology/./memo.html">memos</a> to conserve memory (we always retain the <a href="plumbing/terminology/./dependency.html">dependency</a> information for those memos, however, so that we can still compute whether values may have changed, even if we don't know what that value is).</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="memo"><a class="header" href="#memo">Memo</a></h1>
|
|
<p>A <em>memo</em> stores information about the last time that a <a href="plumbing/terminology/./query_function.html">query function</a> for some <a href="plumbing/terminology/./query.html">query</a> Q was executed:</p>
|
|
<ul>
|
|
<li>Typically, it contains the value that was returned from that function, so that we don't have to execute it again.
|
|
<ul>
|
|
<li>However, this is not always true: some queries don't cache their result values, and values can also be dropped as a result of <a href="plumbing/terminology/./LRU.html">LRU</a> collection. In those cases, the memo just stores <a href="plumbing/terminology/./dependency.html">dependency</a> information, which can still be useful to determine if other queries that have Q as a <a href="plumbing/terminology/./dependency.html">dependency</a> may have changed.</li>
|
|
</ul>
|
|
</li>
|
|
<li>The revision in which the memo last <a href="plumbing/terminology/./verified.html">verified</a>.</li>
|
|
<li>The <a href="plumbing/terminology/./changed_at.html">changed at</a> revision in which the memo's value last changed. (Note that it may be <a href="plumbing/terminology/./backdate.html">backdated</a>.)</li>
|
|
<li>The minimum durability of the memo's <a href="plumbing/terminology/./dependency.html">dependencies</a>.</li>
|
|
<li>The complete set of <a href="plumbing/terminology/./dependency.html">dependencies</a>, if available, or a marker that the memo has an <a href="plumbing/terminology/./untracked.html">untracked dependency</a>.</li>
|
|
</ul>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="query"><a class="header" href="#query">Query</a></h1>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="query-function"><a class="header" href="#query-function">Query function</a></h1>
|
|
<p>The <em>query function</em> is the user-provided function that we execute to compute the value of a <a href="plumbing/terminology/./derived_query.html">derived query</a>. Salsa assumed that all query functions are a 'pure' function of their <a href="plumbing/terminology/./dependency.html">dependencies</a> unless the user reports an <a href="plumbing/terminology/./untracked.html">untracked read</a>. Salsa always assumes that functions have no important side-effects (i.e., that they don't send messages over the network whose results you wish to observe) and thus that it doesn't have to re-execute functions unless it needs their return value.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="revision"><a class="header" href="#revision">Revision</a></h1>
|
|
<p>A <em>revision</em> is a monotonically increasing integer that we use to track the "version" of the database. Each time the value of an <a href="plumbing/terminology/./input_query.html">input query</a> is modified, we create a new revision.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="salsa-item"><a class="header" href="#salsa-item">Salsa item</a></h1>
|
|
<p>A salsa item is something that is decorated with a <code>#[salsa::foo]</code> macro, like a tracked function or struct.
|
|
See the <a href="plumbing/terminology/../jars_and_ingredients.html">jars and ingredients</a> chapter for more details.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="salsa-struct"><a class="header" href="#salsa-struct">Salsa struct</a></h1>
|
|
<p>A salsa struct is a struct decorated with one of the salsa macros:</p>
|
|
<ul>
|
|
<li><code>#[salsa::tracked]</code></li>
|
|
<li><code>#[salsa::input]</code></li>
|
|
<li><code>#[salsa::interned]</code></li>
|
|
</ul>
|
|
<p>See the <a href="plumbing/terminology/../../overview.html">salsa overview</a> for more details.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="untracked-dependency"><a class="header" href="#untracked-dependency">Untracked dependency</a></h1>
|
|
<p>An <em>untracked dependency</em> is an indication that the result of a <a href="plumbing/terminology/./derived_query.html">derived query</a> depends on something not visible to the salsa database. Untracked dependencies are created by invoking <a href="https://docs.rs/salsa/0.16.1/salsa/struct.Runtime.html#method.report_untracked_read"><code>report_untracked_read</code></a> or <a href="https://docs.rs/salsa/0.16.1/salsa/struct.Runtime.html#method.report_synthetic_read"><code>report_synthetic_read</code></a>. When an untracked dependency is present, <a href="plumbing/terminology/./derived_query.html">derived queries</a> are always re-executed if the durability check fails (see the description of the <a href="plumbing/terminology/../fetch.html#derived-queries">fetch operation</a> for more details).</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="verified"><a class="header" href="#verified">Verified</a></h1>
|
|
<p>A <a href="plumbing/terminology/./memo.html">memo</a> is <em>verified</em> in a revision R if we have checked that its value is still up-to-date (i.e., if we were to reexecute the <a href="plumbing/terminology/./query_function.html">query function</a>, we are guaranteed to get the same result). Each memo tracks the revision in which it was last verified to avoid repeatedly checking whether dependencies have changed during the <a href="plumbing/terminology/../fetch.html">fetch</a> and <a href="plumbing/terminology/../maybe_changed_after.html">maybe changed after</a> operations.</p>
|
|
<div style="break-before: page; page-break-before: always;"></div><h1 id="meta-about-the-book-itself"><a class="header" href="#meta-about-the-book-itself">Meta: about the book itself</a></h1>
|
|
<h2 id="linking-policy"><a class="header" href="#linking-policy">Linking policy</a></h2>
|
|
<p>We try to avoid links that easily become fragile. </p>
|
|
<p><strong>Do:</strong></p>
|
|
<ul>
|
|
<li>Link to <code>docs.rs</code> types to document the public API, but modify the link to use <code>latest</code> as the version.</li>
|
|
<li>Link to modules in the source code.</li>
|
|
<li>Create <a href="https://rust-lang.github.io/mdBook/format/mdbook.html?highlight=ANCHOR#including-portions-of-a-file">"named anchors"</a> and embed source code directly.</li>
|
|
</ul>
|
|
<p><strong>Don't:</strong></p>
|
|
<ul>
|
|
<li>Link to direct lines on github, even within a specific commit, unless you are trying to reference a historical piece of code ("how things were at the time").</li>
|
|
</ul>
|
|
|
|
</main>
|
|
|
|
<nav class="nav-wrapper" aria-label="Page navigation">
|
|
<!-- Mobile navigation buttons -->
|
|
|
|
|
|
<div style="clear: both"></div>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
|
|
<nav class="nav-wide-wrapper" aria-label="Page navigation">
|
|
|
|
</nav>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<script type="text/javascript">
|
|
window.playground_copyable = true;
|
|
</script>
|
|
|
|
|
|
<script src="elasticlunr.min.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="mark.min.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="searcher.js" type="text/javascript" charset="utf-8"></script>
|
|
|
|
<script src="clipboard.min.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="highlight.js" type="text/javascript" charset="utf-8"></script>
|
|
<script src="book.js" type="text/javascript" charset="utf-8"></script>
|
|
|
|
<!-- Custom JS scripts -->
|
|
<script type="text/javascript" src="mermaid.min.js"></script>
|
|
<script type="text/javascript" src="mermaid-init.js"></script>
|
|
|
|
<script type="text/javascript">
|
|
window.addEventListener('load', function() {
|
|
window.setTimeout(window.print, 100);
|
|
});
|
|
</script>
|
|
|
|
</body>
|
|
</html>
|