Restructure handling of changed regions when reparsing

This commit is contained in:
Max Brunsfeld 2022-08-21 11:36:17 -07:00
parent 5209e2d68c
commit e8548e7732

View file

@ -2,14 +2,12 @@ use crate::{
Grammar, InjectionConfig, Language, LanguageRegistry, QueryCursorHandle, TextProvider, Grammar, InjectionConfig, Language, LanguageRegistry, QueryCursorHandle, TextProvider,
ToTreeSitterPoint, ToTreeSitterPoint,
}; };
use collections::HashMap;
use std::{ use std::{
borrow::Cow, cell::RefCell, cmp::Ordering, collections::BinaryHeap, ops::Range, sync::Arc, borrow::Cow, cell::RefCell, cmp::Ordering, collections::BinaryHeap, ops::Range, sync::Arc,
}; };
use sum_tree::{Bias, SeekTarget, SumTree}; use sum_tree::{Bias, SeekTarget, SumTree};
use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
use tree_sitter::{Parser, Tree}; use tree_sitter::{Parser, Tree};
use util::post_inc;
thread_local! { thread_local! {
static PARSER: RefCell<Parser> = RefCell::new(Parser::new()); static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
@ -42,28 +40,26 @@ struct SyntaxLayerSummary {
last_layer_range: Range<Anchor>, last_layer_range: Range<Anchor>,
} }
#[derive(Clone, Debug)] #[derive(Debug)]
struct Depth(usize); struct DepthAndRange(usize, Range<Anchor>);
#[derive(Clone, Debug)] #[derive(Debug)]
struct MaxPosition(Anchor); struct DepthAndMaxPosition(usize, Anchor);
enum ReparseStep { #[derive(Debug)]
CreateLayer { struct DepthAndRangeOrMaxPosition(usize, Range<Anchor>, Anchor);
struct ReparseStep {
depth: usize, depth: usize,
language: Arc<Language>, language: Arc<Language>,
ranges: Vec<tree_sitter::Range>, ranges: Vec<tree_sitter::Range>,
}, range: Range<Anchor>,
EnterChangedRange { }
id: usize,
#[derive(Debug, PartialEq, Eq)]
struct ChangedRegion {
depth: usize, depth: usize,
range: Range<usize>, range: Range<Anchor>,
},
LeaveChangedRange {
id: usize,
depth: usize,
range: Range<usize>,
},
} }
impl SyntaxMap { impl SyntaxMap {
@ -130,7 +126,16 @@ impl SyntaxSnapshot {
for depth in 0..=max_depth { for depth in 0..=max_depth {
let mut edits = &edits[..]; let mut edits = &edits[..];
layers.push_tree(cursor.slice(&Depth(depth), Bias::Left, text), text); if cursor.start().max_depth < depth {
layers.push_tree(
cursor.slice(
&DepthAndRange(depth, Anchor::MIN..Anchor::MAX),
Bias::Left,
text,
),
text,
);
}
while let Some(layer) = cursor.item() { while let Some(layer) = cursor.item() {
let mut endpoints = text.summaries_for_anchors::<(usize, Point), _>([ let mut endpoints = text.summaries_for_anchors::<(usize, Point), _>([
@ -150,10 +155,7 @@ impl SyntaxSnapshot {
if first_edit.new.start.0 > layer_range.end.0 { if first_edit.new.start.0 > layer_range.end.0 {
layers.push_tree( layers.push_tree(
cursor.slice( cursor.slice(
&( &DepthAndMaxPosition(depth, text.anchor_before(first_edit.new.start.0)),
Depth(depth),
MaxPosition(text.anchor_before(first_edit.new.start.0)),
),
Bias::Left, Bias::Left,
text, text,
), ),
@ -183,8 +185,8 @@ impl SyntaxSnapshot {
} }
// Apply any edits that intersect this layer to the layer's syntax tree. // Apply any edits that intersect this layer to the layer's syntax tree.
if edit.new.start.0 >= start_byte { let tree_edit = if edit.new.start.0 >= start_byte {
layer.tree.edit(&tree_sitter::InputEdit { tree_sitter::InputEdit {
start_byte: edit.new.start.0 - start_byte, start_byte: edit.new.start.0 - start_byte,
old_end_byte: edit.new.start.0 - start_byte old_end_byte: edit.new.start.0 - start_byte
+ (edit.old.end.0 - edit.old.start.0), + (edit.old.end.0 - edit.old.start.0),
@ -194,16 +196,20 @@ impl SyntaxSnapshot {
+ (edit.old.end.1 - edit.old.start.1)) + (edit.old.end.1 - edit.old.start.1))
.to_ts_point(), .to_ts_point(),
new_end_position: (edit.new.end.1 - start_point).to_ts_point(), new_end_position: (edit.new.end.1 - start_point).to_ts_point(),
}); }
} else { } else {
layer.tree.edit(&tree_sitter::InputEdit { tree_sitter::InputEdit {
start_byte: 0, start_byte: 0,
old_end_byte: edit.new.end.0 - start_byte, old_end_byte: edit.new.end.0 - start_byte,
new_end_byte: 0, new_end_byte: 0,
start_position: Default::default(), start_position: Default::default(),
old_end_position: (edit.new.end.1 - start_point).to_ts_point(), old_end_position: (edit.new.end.1 - start_point).to_ts_point(),
new_end_position: Default::default(), new_end_position: Default::default(),
}); }
};
layer.tree.edit(&tree_edit);
if edit.new.start.0 < start_byte {
break; break;
} }
} }
@ -228,23 +234,67 @@ impl SyntaxSnapshot {
cursor.next(&text); cursor.next(&text);
let mut layers = SumTree::new(); let mut layers = SumTree::new();
let mut next_change_id = 0; let mut changed_regions = Vec::<ChangedRegion>::new();
let mut current_changes = HashMap::default();
let mut queue = BinaryHeap::new(); let mut queue = BinaryHeap::new();
queue.push(ReparseStep::CreateLayer { queue.push(ReparseStep {
depth: 0, depth: 0,
language: language.clone(), language: language.clone(),
ranges: Vec::new(), ranges: Vec::new(),
range: Anchor::MIN..Anchor::MAX,
}); });
while let Some(step) = queue.pop() { loop {
match step { let step = queue.pop();
ReparseStep::CreateLayer { let (depth, range) = if let Some(step) = &step {
depth, (step.depth, step.range.clone())
language, } else {
ranges, (cursor.start().max_depth, Anchor::MAX..Anchor::MAX)
} => { };
let range;
let target = DepthAndRange(depth, range.clone());
if target.cmp(cursor.start(), &text).is_gt() {
let change_start_anchor = changed_regions
.first()
.map_or(Anchor::MAX, |region| region.range.start);
let seek_target =
DepthAndRangeOrMaxPosition(depth, range.clone(), change_start_anchor);
let slice = cursor.slice(&seek_target, Bias::Left, text);
layers.push_tree(slice, &text);
while let Some(layer) = cursor.item() {
if target.cmp(&cursor.end(text), text).is_le() {
break;
}
if layer_is_changed(layer, text, &changed_regions) {
let region = ChangedRegion {
depth: depth + 1,
range: layer.range.clone(),
};
if let Err(i) =
changed_regions.binary_search_by(|probe| probe.cmp(&region, text))
{
changed_regions.insert(i, region);
}
} else {
layers.push(layer.clone(), text);
}
cursor.next(text);
}
changed_regions.retain(|region| {
region.depth > depth
|| (region.depth == depth
&& region.range.end.cmp(&range.start, text).is_gt())
});
}
let (ranges, language) = if let Some(step) = step {
(step.ranges, step.language)
} else {
break;
};
let start_point; let start_point;
let start_byte; let start_byte;
let end_byte; let end_byte;
@ -252,35 +302,12 @@ impl SyntaxSnapshot {
start_point = first.start_point; start_point = first.start_point;
start_byte = first.start_byte; start_byte = first.start_byte;
end_byte = last.end_byte; end_byte = last.end_byte;
range = text.anchor_before(start_byte)..text.anchor_after(end_byte);
} else { } else {
start_point = Point::zero().to_ts_point(); start_point = Point::zero().to_ts_point();
start_byte = 0; start_byte = 0;
end_byte = text.len(); end_byte = text.len();
range = Anchor::MIN..Anchor::MAX;
}; };
let target = (Depth(depth), range.clone());
if target.cmp(cursor.start(), &text).is_gt() {
if current_changes.is_empty() {
let slice = cursor.slice(&target, Bias::Left, text);
layers.push_tree(slice, &text);
} else {
while let Some(layer) = cursor.item() {
if layer.depth > depth
|| layer.depth == depth
&& layer.range.start.cmp(&range.end, text).is_ge()
{
break;
}
if !layer_is_changed(layer, text, &current_changes) {
layers.push(layer.clone(), text);
}
cursor.next(text);
}
}
}
let mut old_layer = cursor.item(); let mut old_layer = cursor.item();
if let Some(layer) = old_layer { if let Some(layer) = old_layer {
if layer.range.to_offset(text) == (start_byte..end_byte) { if layer.range.to_offset(text) == (start_byte..end_byte) {
@ -331,22 +358,18 @@ impl SyntaxSnapshot {
changed_ranges.is_empty(), changed_ranges.is_empty(),
) { ) {
let depth = depth + 1; let depth = depth + 1;
queue.extend(changed_ranges.iter().flat_map(|range| {
let id = post_inc(&mut next_change_id); for range in &changed_ranges {
let range = start_byte + range.start..start_byte + range.end; let region = ChangedRegion {
[
ReparseStep::EnterChangedRange {
id,
depth, depth,
range: range.clone(), range: text.anchor_before(range.start)..text.anchor_after(range.end),
}, };
ReparseStep::LeaveChangedRange { if let Err(i) =
id, changed_regions.binary_search_by(|probe| probe.cmp(&region, text))
depth, {
range: range.clone(), changed_regions.insert(i, region);
}, }
] }
}));
get_injections( get_injections(
config, config,
@ -361,51 +384,7 @@ impl SyntaxSnapshot {
); );
} }
} }
ReparseStep::EnterChangedRange { id, depth, range } => {
let range = text.anchor_before(range.start)..text.anchor_after(range.end);
if current_changes.is_empty() {
let target = (Depth(depth), range.start..Anchor::MAX);
let slice = cursor.slice(&target, Bias::Left, text);
layers.push_tree(slice, text);
} else {
while let Some(layer) = cursor.item() {
if layer.depth > depth
|| layer.depth == depth
&& layer.range.end.cmp(&range.start, text).is_gt()
{
break;
}
if !layer_is_changed(layer, text, &current_changes) {
layers.push(layer.clone(), text);
}
cursor.next(text);
}
}
current_changes.insert(id, range);
}
ReparseStep::LeaveChangedRange { id, depth, range } => {
let range = text.anchor_before(range.start)..text.anchor_after(range.end);
while let Some(layer) = cursor.item() {
if layer.depth > depth
|| layer.depth == depth
&& layer.range.start.cmp(&range.end, text).is_ge()
{
break;
}
if !layer_is_changed(layer, text, &current_changes) {
layers.push(layer.clone(), text);
}
cursor.next(text);
}
current_changes.remove(&id);
}
}
}
let slice = cursor.suffix(&text);
layers.push_tree(slice, &text);
drop(cursor); drop(cursor);
self.layers = layers; self.layers = layers;
} }
@ -512,7 +491,7 @@ fn get_injections(
start_byte: usize, start_byte: usize,
start_point: Point, start_point: Point,
query_ranges: &[Range<usize>], query_ranges: &[Range<usize>],
stack: &mut BinaryHeap<ReparseStep>, queue: &mut BinaryHeap<ReparseStep>,
) -> bool { ) -> bool {
let mut result = false; let mut result = false;
let mut query_cursor = QueryCursorHandle::new(); let mut query_cursor = QueryCursorHandle::new();
@ -547,7 +526,7 @@ fn get_injections(
continue; continue;
} }
} }
prev_match = Some((mat.pattern_index, content_range)); prev_match = Some((mat.pattern_index, content_range.clone()));
let language_name = config.languages_by_pattern_ix[mat.pattern_index] let language_name = config.languages_by_pattern_ix[mat.pattern_index]
.as_ref() .as_ref()
@ -566,10 +545,13 @@ fn get_injections(
if let Some(language_name) = language_name { if let Some(language_name) = language_name {
if let Some(language) = language_registry.get_language(language_name.as_ref()) { if let Some(language) = language_registry.get_language(language_name.as_ref()) {
result = true; result = true;
stack.push(ReparseStep::CreateLayer { let range = text.anchor_before(content_range.start)
..text.anchor_after(content_range.end);
queue.push(ReparseStep {
depth, depth,
language, language,
ranges: content_ranges, ranges: content_ranges,
range,
}) })
} }
} }
@ -581,11 +563,11 @@ fn get_injections(
fn layer_is_changed( fn layer_is_changed(
layer: &SyntaxLayer, layer: &SyntaxLayer,
text: &BufferSnapshot, text: &BufferSnapshot,
changed_ranges: &HashMap<usize, Range<Anchor>>, changed_regions: &[ChangedRegion],
) -> bool { ) -> bool {
changed_ranges.values().any(|range| { changed_regions.iter().any(|region| {
let is_before_layer = range.end.cmp(&layer.range.start, text).is_le(); let is_before_layer = region.range.end.cmp(&layer.range.start, text).is_le();
let is_after_layer = range.start.cmp(&layer.range.end, text).is_ge(); let is_after_layer = region.range.start.cmp(&layer.range.end, text).is_ge();
!is_before_layer && !is_after_layer !is_before_layer && !is_after_layer
}) })
} }
@ -598,22 +580,6 @@ impl std::ops::Deref for SyntaxMap {
} }
} }
impl ReparseStep {
fn sort_key(&self) -> (usize, Range<usize>) {
match self {
ReparseStep::CreateLayer { depth, ranges, .. } => (
*depth,
ranges.first().map_or(0, |r| r.start_byte)
..ranges.last().map_or(usize::MAX, |r| r.end_byte),
),
ReparseStep::EnterChangedRange { depth, range, .. } => {
(*depth, range.start..usize::MAX)
}
ReparseStep::LeaveChangedRange { depth, range, .. } => (*depth, range.end..usize::MAX),
}
}
}
impl PartialEq for ReparseStep { impl PartialEq for ReparseStep {
fn eq(&self, _: &Self) -> bool { fn eq(&self, _: &Self) -> bool {
false false
@ -630,14 +596,32 @@ impl PartialOrd for ReparseStep {
impl Ord for ReparseStep { impl Ord for ReparseStep {
fn cmp(&self, other: &Self) -> Ordering { fn cmp(&self, other: &Self) -> Ordering {
let (depth_a, range_a) = self.sort_key(); let range_a = self.range();
let (depth_b, range_b) = other.sort_key(); let range_b = other.range();
Ord::cmp(&depth_b, &depth_a) Ord::cmp(&other.depth, &self.depth)
.then_with(|| Ord::cmp(&range_b.start, &range_a.start)) .then_with(|| Ord::cmp(&range_b.start, &range_a.start))
.then_with(|| Ord::cmp(&range_a.end, &range_b.end)) .then_with(|| Ord::cmp(&range_a.end, &range_b.end))
} }
} }
impl ReparseStep {
fn range(&self) -> Range<usize> {
let start = self.ranges.first().map_or(0, |r| r.start_byte);
let end = self.ranges.last().map_or(0, |r| r.end_byte);
start..end
}
}
impl ChangedRegion {
fn cmp(&self, other: &Self, buffer: &BufferSnapshot) -> Ordering {
let range_a = &self.range;
let range_b = &other.range;
Ord::cmp(&self.depth, &other.depth)
.then_with(|| range_a.start.cmp(&range_b.start, buffer))
.then_with(|| range_b.end.cmp(&range_a.end, buffer))
}
}
impl Default for SyntaxLayerSummary { impl Default for SyntaxLayerSummary {
fn default() -> Self { fn default() -> Self {
Self { Self {
@ -666,24 +650,9 @@ impl sum_tree::Summary for SyntaxLayerSummary {
} }
} }
impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for Depth { impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRange {
fn cmp(&self, cursor_location: &SyntaxLayerSummary, _: &BufferSnapshot) -> Ordering {
Ord::cmp(&self.0, &cursor_location.max_depth)
}
}
impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, MaxPosition) {
fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering {
self.0
.cmp(&cursor_location, text)
.then_with(|| (self.1).0.cmp(&cursor_location.range.end, text))
}
}
impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, Range<Anchor>) {
fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering { fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
self.0 Ord::cmp(&self.0, &cursor_location.max_depth)
.cmp(&cursor_location, buffer)
.then_with(|| { .then_with(|| {
self.1 self.1
.start .start
@ -698,6 +667,37 @@ impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for (Depth, Rang
} }
} }
impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndMaxPosition {
fn cmp(&self, cursor_location: &SyntaxLayerSummary, text: &BufferSnapshot) -> Ordering {
Ord::cmp(&self.0, &cursor_location.max_depth)
.then_with(|| self.1.cmp(&cursor_location.range.end, text))
}
}
impl<'a> SeekTarget<'a, SyntaxLayerSummary, SyntaxLayerSummary> for DepthAndRangeOrMaxPosition {
fn cmp(&self, cursor_location: &SyntaxLayerSummary, buffer: &BufferSnapshot) -> Ordering {
let cmp = Ord::cmp(&self.0, &cursor_location.max_depth);
if cmp.is_ne() {
return cmp;
}
let cmp = self.2.cmp(&cursor_location.range.end, buffer);
if cmp.is_gt() {
return Ordering::Greater;
}
self.1
.start
.cmp(&cursor_location.last_layer_range.start, buffer)
.then_with(|| {
cursor_location
.last_layer_range
.end
.cmp(&self.1.end, buffer)
})
}
}
impl sum_tree::Item for SyntaxLayer { impl sum_tree::Item for SyntaxLayer {
type Summary = SyntaxLayerSummary; type Summary = SyntaxLayerSummary;