Report response latency and errors when using (inline) assistant (#11806)

Release Notes:

- N/A

Co-authored-by: Nathan <nathan@zed.dev>
Co-authored-by: David <davidsp@anthropic.com>
This commit is contained in:
Antonio Scandurra 2024-05-14 18:18:26 +02:00 committed by GitHub
parent de09409f01
commit 9f0a20241b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 171 additions and 119 deletions

View file

@ -8,6 +8,7 @@ use crate::{
Split, ToggleFocus, ToggleHistory, ToggleIncludeConversation,
};
use anyhow::{anyhow, Result};
use client::telemetry::Telemetry;
use collections::{hash_map, HashMap, HashSet, VecDeque};
use editor::{
actions::{MoveDown, MoveUp},
@ -38,7 +39,15 @@ use parking_lot::Mutex;
use project::Project;
use search::{buffer_search::DivRegistrar, BufferSearchBar};
use settings::Settings;
use std::{cmp, fmt::Write, iter, ops::Range, path::PathBuf, sync::Arc, time::Duration};
use std::{
cmp,
fmt::Write,
iter,
ops::Range,
path::PathBuf,
sync::Arc,
time::{Duration, Instant},
};
use telemetry_events::AssistantKind;
use theme::ThemeSettings;
use ui::{popover_menu, prelude::*, ButtonLike, ContextMenu, Tab, TabBar, Tooltip};
@ -86,6 +95,7 @@ pub struct AssistantPanel {
toolbar: View<Toolbar>,
languages: Arc<LanguageRegistry>,
fs: Arc<dyn Fs>,
telemetry: Arc<Telemetry>,
_subscriptions: Vec<Subscription>,
next_inline_assist_id: usize,
pending_inline_assists: HashMap<usize, PendingInlineAssist>,
@ -179,6 +189,7 @@ impl AssistantPanel {
toolbar,
languages: workspace.app_state().languages.clone(),
fs: workspace.app_state().fs.clone(),
telemetry: workspace.client().telemetry().clone(),
width: None,
height: None,
_subscriptions: subscriptions,
@ -347,9 +358,16 @@ impl AssistantPanel {
};
let inline_assist_id = post_inc(&mut self.next_inline_assist_id);
let telemetry = self.telemetry.clone();
let codegen =
cx.new_model(|cx| Codegen::new(editor.read(cx).buffer().clone(), codegen_kind, cx));
let codegen = cx.new_model(|cx| {
Codegen::new(
editor.read(cx).buffer().clone(),
codegen_kind,
Some(telemetry),
cx,
)
});
let measurements = Arc::new(Mutex::new(BlockMeasurements::default()));
let inline_assistant = cx.new_view(|cx| {
@ -360,7 +378,6 @@ impl AssistantPanel {
show_include_conversation && self.include_conversation_in_next_inline_assist,
self.inline_prompt_history.clone(),
codegen.clone(),
self.workspace.clone(),
cx,
)
});
@ -1039,6 +1056,7 @@ impl AssistantPanel {
let fs = self.fs.clone();
let workspace = self.workspace.clone();
let languages = self.languages.clone();
let telemetry = self.telemetry.clone();
cx.spawn(|this, mut cx| async move {
let saved_conversation = SavedConversation::load(&path, fs.as_ref()).await?;
let model = this.update(&mut cx, |this, _| this.model.clone())?;
@ -1047,6 +1065,7 @@ impl AssistantPanel {
model,
path.clone(),
languages,
Some(telemetry),
&mut cx,
)
.await?;
@ -1345,6 +1364,7 @@ pub struct Conversation {
pending_save: Task<Result<()>>,
path: Option<PathBuf>,
_subscriptions: Vec<Subscription>,
telemetry: Option<Arc<Telemetry>>,
}
#[derive(Default)]
@ -1381,6 +1401,7 @@ impl Conversation {
fn new(
model: LanguageModel,
language_registry: Arc<LanguageRegistry>,
telemetry: Option<Arc<Telemetry>>,
cx: &mut ModelContext<Self>,
) -> Self {
let markdown = language_registry.language_for_name("Markdown");
@ -1415,6 +1436,7 @@ impl Conversation {
pending_save: Task::ready(Ok(())),
path: None,
buffer,
telemetry,
};
let message = MessageAnchor {
@ -1461,6 +1483,7 @@ impl Conversation {
model: LanguageModel,
path: PathBuf,
language_registry: Arc<LanguageRegistry>,
telemetry: Option<Arc<Telemetry>>,
cx: &mut AsyncAppContext,
) -> Result<Model<Self>> {
let id = match saved_conversation.id {
@ -1513,6 +1536,7 @@ impl Conversation {
pending_save: Task::ready(Ok(())),
path: Some(path),
buffer,
telemetry,
};
this.count_remaining_tokens(cx);
this
@ -1806,10 +1830,15 @@ impl Conversation {
let task = cx.spawn({
|this, mut cx| async move {
let assistant_message_id = assistant_message.id;
let mut response_latency = None;
let stream_completion = async {
let request_start = Instant::now();
let mut messages = stream.await?;
while let Some(message) = messages.next().await {
if response_latency.is_none() {
response_latency = Some(request_start.elapsed());
}
let text = message?;
this.update(&mut cx, |this, cx| {
@ -1848,16 +1877,26 @@ impl Conversation {
if let Some(metadata) =
this.messages_metadata.get_mut(&assistant_message.id)
{
match result {
Ok(_) => {
metadata.status = MessageStatus::Done;
}
Err(error) => {
metadata.status = MessageStatus::Error(SharedString::from(
error.to_string().trim().to_string(),
));
}
let error_message = result
.err()
.map(|error| error.to_string().trim().to_string());
if let Some(error_message) = error_message.as_ref() {
metadata.status =
MessageStatus::Error(SharedString::from(error_message.clone()));
} else {
metadata.status = MessageStatus::Done;
}
if let Some(telemetry) = this.telemetry.as_ref() {
telemetry.report_assistant_event(
this.id.clone(),
AssistantKind::Panel,
this.model.telemetry_id(),
response_latency,
error_message,
);
}
cx.emit(ConversationEvent::MessagesEdited);
}
})
@ -2278,7 +2317,9 @@ impl ConversationEditor {
workspace: View<Workspace>,
cx: &mut ViewContext<Self>,
) -> Self {
let conversation = cx.new_model(|cx| Conversation::new(model, language_registry, cx));
let telemetry = workspace.read(cx).client().telemetry().clone();
let conversation =
cx.new_model(|cx| Conversation::new(model, language_registry, Some(telemetry), cx));
Self::for_conversation(conversation, fs, workspace, cx)
}
@ -2318,15 +2359,6 @@ impl ConversationEditor {
}
fn assist(&mut self, _: &Assist, cx: &mut ViewContext<Self>) {
self.conversation.update(cx, |conversation, cx| {
report_assistant_event(
self.workspace.clone(),
Some(conversation),
AssistantKind::Panel,
cx,
)
});
let cursors = self.cursors(cx);
let user_messages = self.conversation.update(cx, |conversation, cx| {
@ -2864,7 +2896,6 @@ enum InlineAssistantEvent {
struct InlineAssistant {
id: usize,
prompt_editor: View<Editor>,
workspace: WeakView<Workspace>,
confirmed: bool,
show_include_conversation: bool,
include_conversation: bool,
@ -2945,7 +2976,6 @@ impl InlineAssistant {
include_conversation: bool,
prompt_history: VecDeque<String>,
codegen: Model<Codegen>,
workspace: WeakView<Workspace>,
cx: &mut ViewContext<Self>,
) -> Self {
let prompt_editor = cx.new_view(|cx| {
@ -2967,7 +2997,6 @@ impl InlineAssistant {
Self {
id,
prompt_editor,
workspace,
confirmed: false,
show_include_conversation,
include_conversation,
@ -3016,8 +3045,6 @@ impl InlineAssistant {
if self.confirmed {
cx.emit(InlineAssistantEvent::Dismissed);
} else {
report_assistant_event(self.workspace.clone(), None, AssistantKind::Inline, cx);
let prompt = self.prompt_editor.read(cx).text(cx);
self.prompt_editor
.update(cx, |editor, _cx| editor.set_read_only(true));
@ -3147,30 +3174,6 @@ fn merge_ranges(ranges: &mut Vec<Range<Anchor>>, buffer: &MultiBufferSnapshot) {
}
}
fn report_assistant_event(
workspace: WeakView<Workspace>,
conversation: Option<&Conversation>,
assistant_kind: AssistantKind,
cx: &mut AppContext,
) {
let Some(workspace) = workspace.upgrade() else {
return;
};
let client = workspace.read(cx).project().read(cx).client();
let telemetry = client.telemetry();
let conversation_id = conversation.and_then(|conversation| conversation.id.clone());
let model_id = conversation
.map(|c| c.model.telemetry_id())
.unwrap_or_else(|| {
CompletionProvider::global(cx)
.default_model()
.telemetry_id()
});
telemetry.report_assistant_event(conversation_id, assistant_kind, model_id)
}
#[cfg(test)]
mod tests {
use super::*;
@ -3187,7 +3190,7 @@ mod tests {
let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
let conversation =
cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, cx));
cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, None, cx));
let buffer = conversation.read(cx).buffer.clone();
let message_1 = conversation.read(cx).message_anchors[0].clone();
@ -3319,7 +3322,7 @@ mod tests {
let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
let conversation =
cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, cx));
cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, None, cx));
let buffer = conversation.read(cx).buffer.clone();
let message_1 = conversation.read(cx).message_anchors[0].clone();
@ -3418,7 +3421,7 @@ mod tests {
init(cx);
let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
let conversation =
cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, cx));
cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, None, cx));
let buffer = conversation.read(cx).buffer.clone();
let message_1 = conversation.read(cx).message_anchors[0].clone();
@ -3502,8 +3505,9 @@ mod tests {
cx.set_global(CompletionProvider::Fake(FakeCompletionProvider::default()));
cx.update(init);
let registry = Arc::new(LanguageRegistry::test(cx.executor()));
let conversation =
cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry.clone(), cx));
let conversation = cx.new_model(|cx| {
Conversation::new(LanguageModel::default(), registry.clone(), None, cx)
});
let buffer = conversation.read_with(cx, |conversation, _| conversation.buffer.clone());
let message_0 =
conversation.read_with(cx, |conversation, _| conversation.message_anchors[0].id);
@ -3542,6 +3546,7 @@ mod tests {
LanguageModel::default(),
Default::default(),
registry.clone(),
None,
&mut cx.to_async(),
)
.await

View file

@ -3,12 +3,13 @@ use crate::{
CompletionProvider, LanguageModelRequest,
};
use anyhow::Result;
use client::telemetry::Telemetry;
use editor::{Anchor, MultiBuffer, MultiBufferSnapshot, ToOffset, ToPoint};
use futures::{channel::mpsc, SinkExt, Stream, StreamExt};
use gpui::{EventEmitter, Model, ModelContext, Task};
use language::{Rope, TransactionId};
use multi_buffer::MultiBufferRow;
use std::{cmp, future, ops::Range};
use std::{cmp, future, ops::Range, sync::Arc, time::Instant};
pub enum Event {
Finished,
@ -30,13 +31,19 @@ pub struct Codegen {
error: Option<anyhow::Error>,
generation: Task<()>,
idle: bool,
telemetry: Option<Arc<Telemetry>>,
_subscription: gpui::Subscription,
}
impl EventEmitter<Event> for Codegen {}
impl Codegen {
pub fn new(buffer: Model<MultiBuffer>, kind: CodegenKind, cx: &mut ModelContext<Self>) -> Self {
pub fn new(
buffer: Model<MultiBuffer>,
kind: CodegenKind,
telemetry: Option<Arc<Telemetry>>,
cx: &mut ModelContext<Self>,
) -> Self {
let snapshot = buffer.read(cx).snapshot(cx);
Self {
buffer: buffer.clone(),
@ -47,6 +54,7 @@ impl Codegen {
error: Default::default(),
idle: true,
generation: Task::ready(()),
telemetry,
_subscription: cx.subscribe(&buffer, Self::handle_buffer_event),
}
}
@ -103,7 +111,9 @@ impl Codegen {
.next()
.unwrap_or_else(|| snapshot.indent_size_for_line(MultiBufferRow(selection_start.row)));
let model_telemetry_id = prompt.model.telemetry_id();
let response = CompletionProvider::global(cx).complete(prompt);
let telemetry = self.telemetry.clone();
self.generation = cx.spawn(|this, mut cx| {
async move {
let generate = async {
@ -111,68 +121,89 @@ impl Codegen {
let (mut hunks_tx, mut hunks_rx) = mpsc::channel(1);
let diff = cx.background_executor().spawn(async move {
let chunks = strip_invalid_spans_from_codeblock(response.await?);
futures::pin_mut!(chunks);
let mut diff = StreamingDiff::new(selected_text.to_string());
let mut response_latency = None;
let request_start = Instant::now();
let diff = async {
let chunks = strip_invalid_spans_from_codeblock(response.await?);
futures::pin_mut!(chunks);
let mut diff = StreamingDiff::new(selected_text.to_string());
let mut new_text = String::new();
let mut base_indent = None;
let mut line_indent = None;
let mut first_line = true;
let mut new_text = String::new();
let mut base_indent = None;
let mut line_indent = None;
let mut first_line = true;
while let Some(chunk) = chunks.next().await {
let chunk = chunk?;
while let Some(chunk) = chunks.next().await {
if response_latency.is_none() {
response_latency = Some(request_start.elapsed());
}
let chunk = chunk?;
let mut lines = chunk.split('\n').peekable();
while let Some(line) = lines.next() {
new_text.push_str(line);
if line_indent.is_none() {
if let Some(non_whitespace_ch_ix) =
new_text.find(|ch: char| !ch.is_whitespace())
{
line_indent = Some(non_whitespace_ch_ix);
base_indent = base_indent.or(line_indent);
let mut lines = chunk.split('\n').peekable();
while let Some(line) = lines.next() {
new_text.push_str(line);
if line_indent.is_none() {
if let Some(non_whitespace_ch_ix) =
new_text.find(|ch: char| !ch.is_whitespace())
{
line_indent = Some(non_whitespace_ch_ix);
base_indent = base_indent.or(line_indent);
let line_indent = line_indent.unwrap();
let base_indent = base_indent.unwrap();
let indent_delta = line_indent as i32 - base_indent as i32;
let mut corrected_indent_len = cmp::max(
0,
suggested_line_indent.len as i32 + indent_delta,
)
as usize;
if first_line {
corrected_indent_len = corrected_indent_len
.saturating_sub(selection_start.column as usize);
let line_indent = line_indent.unwrap();
let base_indent = base_indent.unwrap();
let indent_delta =
line_indent as i32 - base_indent as i32;
let mut corrected_indent_len = cmp::max(
0,
suggested_line_indent.len as i32 + indent_delta,
)
as usize;
if first_line {
corrected_indent_len = corrected_indent_len
.saturating_sub(
selection_start.column as usize,
);
}
let indent_char = suggested_line_indent.char();
let mut indent_buffer = [0; 4];
let indent_str =
indent_char.encode_utf8(&mut indent_buffer);
new_text.replace_range(
..line_indent,
&indent_str.repeat(corrected_indent_len),
);
}
}
let indent_char = suggested_line_indent.char();
let mut indent_buffer = [0; 4];
let indent_str =
indent_char.encode_utf8(&mut indent_buffer);
new_text.replace_range(
..line_indent,
&indent_str.repeat(corrected_indent_len),
);
if line_indent.is_some() {
hunks_tx.send(diff.push_new(&new_text)).await?;
new_text.clear();
}
if lines.peek().is_some() {
hunks_tx.send(diff.push_new("\n")).await?;
line_indent = None;
first_line = false;
}
}
if line_indent.is_some() {
hunks_tx.send(diff.push_new(&new_text)).await?;
new_text.clear();
}
if lines.peek().is_some() {
hunks_tx.send(diff.push_new("\n")).await?;
line_indent = None;
first_line = false;
}
}
}
hunks_tx.send(diff.push_new(&new_text)).await?;
hunks_tx.send(diff.finish()).await?;
hunks_tx.send(diff.push_new(&new_text)).await?;
hunks_tx.send(diff.finish()).await?;
anyhow::Ok(())
anyhow::Ok(())
};
let error_message = diff.await.err().map(|error| error.to_string());
if let Some(telemetry) = telemetry {
telemetry.report_assistant_event(
None,
telemetry_events::AssistantKind::Inline,
model_telemetry_id,
response_latency,
error_message,
);
}
});
while let Some(hunks) = hunks_rx.next().await {
@ -235,7 +266,8 @@ impl Codegen {
})?;
}
diff.await?;
diff.await;
anyhow::Ok(())
};
@ -396,8 +428,9 @@ mod tests {
let snapshot = buffer.snapshot(cx);
snapshot.anchor_before(Point::new(1, 0))..snapshot.anchor_after(Point::new(4, 5))
});
let codegen =
cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Transform { range }, cx));
let codegen = cx.new_model(|cx| {
Codegen::new(buffer.clone(), CodegenKind::Transform { range }, None, cx)
});
let request = LanguageModelRequest::default();
codegen.update(cx, |codegen, cx| codegen.start(request, cx));
@ -454,8 +487,9 @@ mod tests {
let snapshot = buffer.snapshot(cx);
snapshot.anchor_before(Point::new(1, 6))
});
let codegen =
cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Generate { position }, cx));
let codegen = cx.new_model(|cx| {
Codegen::new(buffer.clone(), CodegenKind::Generate { position }, None, cx)
});
let request = LanguageModelRequest::default();
codegen.update(cx, |codegen, cx| codegen.start(request, cx));
@ -512,8 +546,9 @@ mod tests {
let snapshot = buffer.snapshot(cx);
snapshot.anchor_before(Point::new(1, 2))
});
let codegen =
cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Generate { position }, cx));
let codegen = cx.new_model(|cx| {
Codegen::new(buffer.clone(), CodegenKind::Generate { position }, None, cx)
});
let request = LanguageModelRequest::default();
codegen.update(cx, |codegen, cx| codegen.start(request, cx));

View file

@ -261,11 +261,15 @@ impl Telemetry {
conversation_id: Option<String>,
kind: AssistantKind,
model: String,
response_latency: Option<Duration>,
error_message: Option<String>,
) {
let event = Event::Assistant(AssistantEvent {
conversation_id,
kind,
model: model.to_string(),
response_latency,
error_message,
});
self.report_event(event)

View file

@ -785,6 +785,8 @@ pub struct AssistantEventRow {
conversation_id: String,
kind: String,
model: String,
response_latency_in_ms: Option<i64>,
error_message: Option<String>,
}
impl AssistantEventRow {
@ -811,6 +813,10 @@ impl AssistantEventRow {
conversation_id: event.conversation_id.unwrap_or_default(),
kind: event.kind.to_string(),
model: event.model,
response_latency_in_ms: event
.response_latency
.map(|latency| latency.as_millis() as i64),
error_message: event.error_message,
}
}
}

View file

@ -1,6 +1,6 @@
use semantic_version::SemanticVersion;
use serde::{Deserialize, Serialize};
use std::{fmt::Display, sync::Arc};
use std::{fmt::Display, sync::Arc, time::Duration};
#[derive(Serialize, Deserialize, Debug)]
pub struct EventRequestBody {
@ -93,6 +93,8 @@ pub struct AssistantEvent {
pub conversation_id: Option<String>,
pub kind: AssistantKind,
pub model: String,
pub response_latency: Option<Duration>,
pub error_message: Option<String>,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]