Report response latency and errors when using (inline) assistant (#11806)

Release Notes: - N/A Co-authored-by: Nathan <nathan@zed.dev> Co-authored-by: David <davidsp@anthropic.com>
2024-11-28 17:58:42 +00:00 · 2024-05-14 18:18:26 +02:00 · 2024-05-14 18:18:26 +02:00 · 9f0a20241b
commit 9f0a20241b
parent de09409f01
5 changed files with 171 additions and 119 deletions
--- a/crates/assistant/src/assistant_panel.rs
+++ b/crates/assistant/src/assistant_panel.rs
@ -8,6 +8,7 @@ use crate::{
    Split, ToggleFocus, ToggleHistory, ToggleIncludeConversation,
 };
 use anyhow::{anyhow, Result};
+use client::telemetry::Telemetry;
 use collections::{hash_map, HashMap, HashSet, VecDeque};
 use editor::{
    actions::{MoveDown, MoveUp},
@ -38,7 +39,15 @@ use parking_lot::Mutex;
 use project::Project;
 use search::{buffer_search::DivRegistrar, BufferSearchBar};
 use settings::Settings;
-use std::{cmp, fmt::Write, iter, ops::Range, path::PathBuf, sync::Arc, time::Duration};
+use std::{
+    cmp,
+    fmt::Write,
+    iter,
+    ops::Range,
+    path::PathBuf,
+    sync::Arc,
+    time::{Duration, Instant},
+};
 use telemetry_events::AssistantKind;
 use theme::ThemeSettings;
 use ui::{popover_menu, prelude::*, ButtonLike, ContextMenu, Tab, TabBar, Tooltip};
@ -86,6 +95,7 @@ pub struct AssistantPanel {
    toolbar: View<Toolbar>,
    languages: Arc<LanguageRegistry>,
    fs: Arc<dyn Fs>,
+    telemetry: Arc<Telemetry>,
    _subscriptions: Vec<Subscription>,
    next_inline_assist_id: usize,
    pending_inline_assists: HashMap<usize, PendingInlineAssist>,
@ -179,6 +189,7 @@ impl AssistantPanel {
                        toolbar,
                        languages: workspace.app_state().languages.clone(),
                        fs: workspace.app_state().fs.clone(),
+                        telemetry: workspace.client().telemetry().clone(),
                        width: None,
                        height: None,
                        _subscriptions: subscriptions,
@ -347,9 +358,16 @@ impl AssistantPanel {
        };

        let inline_assist_id = post_inc(&mut self.next_inline_assist_id);
+        let telemetry = self.telemetry.clone();

-        let codegen =
-            cx.new_model(|cx| Codegen::new(editor.read(cx).buffer().clone(), codegen_kind, cx));
+        let codegen = cx.new_model(|cx| {
+            Codegen::new(
+                editor.read(cx).buffer().clone(),
+                codegen_kind,
+                Some(telemetry),
+                cx,
+            )
+        });

        let measurements = Arc::new(Mutex::new(BlockMeasurements::default()));
        let inline_assistant = cx.new_view(|cx| {
@ -360,7 +378,6 @@ impl AssistantPanel {
                show_include_conversation && self.include_conversation_in_next_inline_assist,
                self.inline_prompt_history.clone(),
                codegen.clone(),
-                self.workspace.clone(),
                cx,
            )
        });
@ -1039,6 +1056,7 @@ impl AssistantPanel {
        let fs = self.fs.clone();
        let workspace = self.workspace.clone();
        let languages = self.languages.clone();
+        let telemetry = self.telemetry.clone();
        cx.spawn(|this, mut cx| async move {
            let saved_conversation = SavedConversation::load(&path, fs.as_ref()).await?;
            let model = this.update(&mut cx, |this, _| this.model.clone())?;
@ -1047,6 +1065,7 @@ impl AssistantPanel {
                model,
                path.clone(),
                languages,
+                Some(telemetry),
                &mut cx,
            )
            .await?;
@ -1345,6 +1364,7 @@ pub struct Conversation {
    pending_save: Task<Result<()>>,
    path: Option<PathBuf>,
    _subscriptions: Vec<Subscription>,
+    telemetry: Option<Arc<Telemetry>>,
 }

 #[derive(Default)]
@ -1381,6 +1401,7 @@ impl Conversation {
    fn new(
        model: LanguageModel,
        language_registry: Arc<LanguageRegistry>,
+        telemetry: Option<Arc<Telemetry>>,
        cx: &mut ModelContext<Self>,
    ) -> Self {
        let markdown = language_registry.language_for_name("Markdown");
@ -1415,6 +1436,7 @@ impl Conversation {
            pending_save: Task::ready(Ok(())),
            path: None,
            buffer,
+            telemetry,
        };

        let message = MessageAnchor {
@ -1461,6 +1483,7 @@ impl Conversation {
        model: LanguageModel,
        path: PathBuf,
        language_registry: Arc<LanguageRegistry>,
+        telemetry: Option<Arc<Telemetry>>,
        cx: &mut AsyncAppContext,
    ) -> Result<Model<Self>> {
        let id = match saved_conversation.id {
@ -1513,6 +1536,7 @@ impl Conversation {
                pending_save: Task::ready(Ok(())),
                path: Some(path),
                buffer,
+                telemetry,
            };
            this.count_remaining_tokens(cx);
            this
@ -1806,10 +1830,15 @@ impl Conversation {
            let task = cx.spawn({
                |this, mut cx| async move {
                    let assistant_message_id = assistant_message.id;
+                    let mut response_latency = None;
                    let stream_completion = async {
+                        let request_start = Instant::now();
                        let mut messages = stream.await?;

                        while let Some(message) = messages.next().await {
+                            if response_latency.is_none() {
+                                response_latency = Some(request_start.elapsed());
+                            }
                            let text = message?;

                            this.update(&mut cx, |this, cx| {
@ -1848,16 +1877,26 @@ impl Conversation {
                        if let Some(metadata) =
                            this.messages_metadata.get_mut(&assistant_message.id)
                        {
-                            match result {
-                                Ok(_) => {
-                                    metadata.status = MessageStatus::Done;
-                                }
-                                Err(error) => {
-                                    metadata.status = MessageStatus::Error(SharedString::from(
-                                        error.to_string().trim().to_string(),
-                                    ));
-                                }
+                            let error_message = result
+                                .err()
+                                .map(|error| error.to_string().trim().to_string());
+                            if let Some(error_message) = error_message.as_ref() {
+                                metadata.status =
+                                    MessageStatus::Error(SharedString::from(error_message.clone()));
+                            } else {
+                                metadata.status = MessageStatus::Done;
                            }
+
+                            if let Some(telemetry) = this.telemetry.as_ref() {
+                                telemetry.report_assistant_event(
+                                    this.id.clone(),
+                                    AssistantKind::Panel,
+                                    this.model.telemetry_id(),
+                                    response_latency,
+                                    error_message,
+                                );
+                            }
+
                            cx.emit(ConversationEvent::MessagesEdited);
                        }
                    })
@ -2278,7 +2317,9 @@ impl ConversationEditor {
        workspace: View<Workspace>,
        cx: &mut ViewContext<Self>,
    ) -> Self {
-        let conversation = cx.new_model(|cx| Conversation::new(model, language_registry, cx));
+        let telemetry = workspace.read(cx).client().telemetry().clone();
+        let conversation =
+            cx.new_model(|cx| Conversation::new(model, language_registry, Some(telemetry), cx));
        Self::for_conversation(conversation, fs, workspace, cx)
    }

@ -2318,15 +2359,6 @@ impl ConversationEditor {
    }

    fn assist(&mut self, _: &Assist, cx: &mut ViewContext<Self>) {
-        self.conversation.update(cx, |conversation, cx| {
-            report_assistant_event(
-                self.workspace.clone(),
-                Some(conversation),
-                AssistantKind::Panel,
-                cx,
-            )
-        });
-
        let cursors = self.cursors(cx);

        let user_messages = self.conversation.update(cx, |conversation, cx| {
@ -2864,7 +2896,6 @@ enum InlineAssistantEvent {
 struct InlineAssistant {
    id: usize,
    prompt_editor: View<Editor>,
-    workspace: WeakView<Workspace>,
    confirmed: bool,
    show_include_conversation: bool,
    include_conversation: bool,
@ -2945,7 +2976,6 @@ impl InlineAssistant {
        include_conversation: bool,
        prompt_history: VecDeque<String>,
        codegen: Model<Codegen>,
-        workspace: WeakView<Workspace>,
        cx: &mut ViewContext<Self>,
    ) -> Self {
        let prompt_editor = cx.new_view(|cx| {
@ -2967,7 +2997,6 @@ impl InlineAssistant {
        Self {
            id,
            prompt_editor,
-            workspace,
            confirmed: false,
            show_include_conversation,
            include_conversation,
@ -3016,8 +3045,6 @@ impl InlineAssistant {
        if self.confirmed {
            cx.emit(InlineAssistantEvent::Dismissed);
        } else {
-            report_assistant_event(self.workspace.clone(), None, AssistantKind::Inline, cx);
-
            let prompt = self.prompt_editor.read(cx).text(cx);
            self.prompt_editor
                .update(cx, |editor, _cx| editor.set_read_only(true));
@ -3147,30 +3174,6 @@ fn merge_ranges(ranges: &mut Vec<Range<Anchor>>, buffer: &MultiBufferSnapshot) {
    }
 }

-fn report_assistant_event(
-    workspace: WeakView<Workspace>,
-    conversation: Option<&Conversation>,
-    assistant_kind: AssistantKind,
-    cx: &mut AppContext,
-) {
-    let Some(workspace) = workspace.upgrade() else {
-        return;
-    };
-
-    let client = workspace.read(cx).project().read(cx).client();
-    let telemetry = client.telemetry();
-
-    let conversation_id = conversation.and_then(|conversation| conversation.id.clone());
-    let model_id = conversation
-        .map(|c| c.model.telemetry_id())
-        .unwrap_or_else(|| {
-            CompletionProvider::global(cx)
-                .default_model()
-                .telemetry_id()
-        });
-    telemetry.report_assistant_event(conversation_id, assistant_kind, model_id)
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@ -3187,7 +3190,7 @@ mod tests {
        let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));

        let conversation =
-            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, cx));
+            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, None, cx));
        let buffer = conversation.read(cx).buffer.clone();

        let message_1 = conversation.read(cx).message_anchors[0].clone();
@ -3319,7 +3322,7 @@ mod tests {
        let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));

        let conversation =
-            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, cx));
+            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, None, cx));
        let buffer = conversation.read(cx).buffer.clone();

        let message_1 = conversation.read(cx).message_anchors[0].clone();
@ -3418,7 +3421,7 @@ mod tests {
        init(cx);
        let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
        let conversation =
-            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, cx));
+            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry, None, cx));
        let buffer = conversation.read(cx).buffer.clone();

        let message_1 = conversation.read(cx).message_anchors[0].clone();
@ -3502,8 +3505,9 @@ mod tests {
        cx.set_global(CompletionProvider::Fake(FakeCompletionProvider::default()));
        cx.update(init);
        let registry = Arc::new(LanguageRegistry::test(cx.executor()));
-        let conversation =
-            cx.new_model(|cx| Conversation::new(LanguageModel::default(), registry.clone(), cx));
+        let conversation = cx.new_model(|cx| {
+            Conversation::new(LanguageModel::default(), registry.clone(), None, cx)
+        });
        let buffer = conversation.read_with(cx, |conversation, _| conversation.buffer.clone());
        let message_0 =
            conversation.read_with(cx, |conversation, _| conversation.message_anchors[0].id);
@ -3542,6 +3546,7 @@ mod tests {
            LanguageModel::default(),
            Default::default(),
            registry.clone(),
+            None,
            &mut cx.to_async(),
        )
        .await
--- a/crates/assistant/src/codegen.rs
+++ b/crates/assistant/src/codegen.rs
@ -3,12 +3,13 @@ use crate::{
    CompletionProvider, LanguageModelRequest,
 };
 use anyhow::Result;
+use client::telemetry::Telemetry;
 use editor::{Anchor, MultiBuffer, MultiBufferSnapshot, ToOffset, ToPoint};
 use futures::{channel::mpsc, SinkExt, Stream, StreamExt};
 use gpui::{EventEmitter, Model, ModelContext, Task};
 use language::{Rope, TransactionId};
 use multi_buffer::MultiBufferRow;
-use std::{cmp, future, ops::Range};
+use std::{cmp, future, ops::Range, sync::Arc, time::Instant};

 pub enum Event {
    Finished,
@ -30,13 +31,19 @@ pub struct Codegen {
    error: Option<anyhow::Error>,
    generation: Task<()>,
    idle: bool,
+    telemetry: Option<Arc<Telemetry>>,
    _subscription: gpui::Subscription,
 }

 impl EventEmitter<Event> for Codegen {}

 impl Codegen {
-    pub fn new(buffer: Model<MultiBuffer>, kind: CodegenKind, cx: &mut ModelContext<Self>) -> Self {
+    pub fn new(
+        buffer: Model<MultiBuffer>,
+        kind: CodegenKind,
+        telemetry: Option<Arc<Telemetry>>,
+        cx: &mut ModelContext<Self>,
+    ) -> Self {
        let snapshot = buffer.read(cx).snapshot(cx);
        Self {
            buffer: buffer.clone(),
@ -47,6 +54,7 @@ impl Codegen {
            error: Default::default(),
            idle: true,
            generation: Task::ready(()),
+            telemetry,
            _subscription: cx.subscribe(&buffer, Self::handle_buffer_event),
        }
    }
@ -103,7 +111,9 @@ impl Codegen {
            .next()
            .unwrap_or_else(|| snapshot.indent_size_for_line(MultiBufferRow(selection_start.row)));

+        let model_telemetry_id = prompt.model.telemetry_id();
        let response = CompletionProvider::global(cx).complete(prompt);
+        let telemetry = self.telemetry.clone();
        self.generation = cx.spawn(|this, mut cx| {
            async move {
                let generate = async {
@ -111,68 +121,89 @@ impl Codegen {

                    let (mut hunks_tx, mut hunks_rx) = mpsc::channel(1);
                    let diff = cx.background_executor().spawn(async move {
-                        let chunks = strip_invalid_spans_from_codeblock(response.await?);
-                        futures::pin_mut!(chunks);
-                        let mut diff = StreamingDiff::new(selected_text.to_string());
+                        let mut response_latency = None;
+                        let request_start = Instant::now();
+                        let diff = async {
+                            let chunks = strip_invalid_spans_from_codeblock(response.await?);
+                            futures::pin_mut!(chunks);
+                            let mut diff = StreamingDiff::new(selected_text.to_string());

-                        let mut new_text = String::new();
-                        let mut base_indent = None;
-                        let mut line_indent = None;
-                        let mut first_line = true;
+                            let mut new_text = String::new();
+                            let mut base_indent = None;
+                            let mut line_indent = None;
+                            let mut first_line = true;

-                        while let Some(chunk) = chunks.next().await {
-                            let chunk = chunk?;
+                            while let Some(chunk) = chunks.next().await {
+                                if response_latency.is_none() {
+                                    response_latency = Some(request_start.elapsed());
+                                }
+                                let chunk = chunk?;

-                            let mut lines = chunk.split('\n').peekable();
-                            while let Some(line) = lines.next() {
-                                new_text.push_str(line);
-                                if line_indent.is_none() {
-                                    if let Some(non_whitespace_ch_ix) =
-                                        new_text.find(|ch: char| !ch.is_whitespace())
-                                    {
-                                        line_indent = Some(non_whitespace_ch_ix);
-                                        base_indent = base_indent.or(line_indent);
+                                let mut lines = chunk.split('\n').peekable();
+                                while let Some(line) = lines.next() {
+                                    new_text.push_str(line);
+                                    if line_indent.is_none() {
+                                        if let Some(non_whitespace_ch_ix) =
+                                            new_text.find(|ch: char| !ch.is_whitespace())
+                                        {
+                                            line_indent = Some(non_whitespace_ch_ix);
+                                            base_indent = base_indent.or(line_indent);

-                                        let line_indent = line_indent.unwrap();
-                                        let base_indent = base_indent.unwrap();
-                                        let indent_delta = line_indent as i32 - base_indent as i32;
-                                        let mut corrected_indent_len = cmp::max(
-                                            0,
-                                            suggested_line_indent.len as i32 + indent_delta,
-                                        )
-                                            as usize;
-                                        if first_line {
-                                            corrected_indent_len = corrected_indent_len
-                                                .saturating_sub(selection_start.column as usize);
+                                            let line_indent = line_indent.unwrap();
+                                            let base_indent = base_indent.unwrap();
+                                            let indent_delta =
+                                                line_indent as i32 - base_indent as i32;
+                                            let mut corrected_indent_len = cmp::max(
+                                                0,
+                                                suggested_line_indent.len as i32 + indent_delta,
+                                            )
+                                                as usize;
+                                            if first_line {
+                                                corrected_indent_len = corrected_indent_len
+                                                    .saturating_sub(
+                                                        selection_start.column as usize,
+                                                    );
+                                            }
+
+                                            let indent_char = suggested_line_indent.char();
+                                            let mut indent_buffer = [0; 4];
+                                            let indent_str =
+                                                indent_char.encode_utf8(&mut indent_buffer);
+                                            new_text.replace_range(
+                                                ..line_indent,
+                                                &indent_str.repeat(corrected_indent_len),
+                                            );
                                        }
+                                    }

-                                        let indent_char = suggested_line_indent.char();
-                                        let mut indent_buffer = [0; 4];
-                                        let indent_str =
-                                            indent_char.encode_utf8(&mut indent_buffer);
-                                        new_text.replace_range(
-                                            ..line_indent,
-                                            &indent_str.repeat(corrected_indent_len),
-                                        );
+                                    if line_indent.is_some() {
+                                        hunks_tx.send(diff.push_new(&new_text)).await?;
+                                        new_text.clear();
+                                    }
+
+                                    if lines.peek().is_some() {
+                                        hunks_tx.send(diff.push_new("\n")).await?;
+                                        line_indent = None;
+                                        first_line = false;
                                    }
                                }
-
-                                if line_indent.is_some() {
-                                    hunks_tx.send(diff.push_new(&new_text)).await?;
-                                    new_text.clear();
-                                }
-
-                                if lines.peek().is_some() {
-                                    hunks_tx.send(diff.push_new("\n")).await?;
-                                    line_indent = None;
-                                    first_line = false;
-                                }
                            }
-                        }
-                        hunks_tx.send(diff.push_new(&new_text)).await?;
-                        hunks_tx.send(diff.finish()).await?;
+                            hunks_tx.send(diff.push_new(&new_text)).await?;
+                            hunks_tx.send(diff.finish()).await?;

-                        anyhow::Ok(())
+                            anyhow::Ok(())
+                        };
+
+                        let error_message = diff.await.err().map(|error| error.to_string());
+                        if let Some(telemetry) = telemetry {
+                            telemetry.report_assistant_event(
+                                None,
+                                telemetry_events::AssistantKind::Inline,
+                                model_telemetry_id,
+                                response_latency,
+                                error_message,
+                            );
+                        }
                    });

                    while let Some(hunks) = hunks_rx.next().await {
@ -235,7 +266,8 @@ impl Codegen {
                        })?;
                    }

-                    diff.await?;
+                    diff.await;
+
                    anyhow::Ok(())
                };

@ -396,8 +428,9 @@ mod tests {
            let snapshot = buffer.snapshot(cx);
            snapshot.anchor_before(Point::new(1, 0))..snapshot.anchor_after(Point::new(4, 5))
        });
-        let codegen =
-            cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Transform { range }, cx));
+        let codegen = cx.new_model(|cx| {
+            Codegen::new(buffer.clone(), CodegenKind::Transform { range }, None, cx)
+        });

        let request = LanguageModelRequest::default();
        codegen.update(cx, |codegen, cx| codegen.start(request, cx));
@ -454,8 +487,9 @@ mod tests {
            let snapshot = buffer.snapshot(cx);
            snapshot.anchor_before(Point::new(1, 6))
        });
-        let codegen =
-            cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Generate { position }, cx));
+        let codegen = cx.new_model(|cx| {
+            Codegen::new(buffer.clone(), CodegenKind::Generate { position }, None, cx)
+        });

        let request = LanguageModelRequest::default();
        codegen.update(cx, |codegen, cx| codegen.start(request, cx));
@ -512,8 +546,9 @@ mod tests {
            let snapshot = buffer.snapshot(cx);
            snapshot.anchor_before(Point::new(1, 2))
        });
-        let codegen =
-            cx.new_model(|cx| Codegen::new(buffer.clone(), CodegenKind::Generate { position }, cx));
+        let codegen = cx.new_model(|cx| {
+            Codegen::new(buffer.clone(), CodegenKind::Generate { position }, None, cx)
+        });

        let request = LanguageModelRequest::default();
        codegen.update(cx, |codegen, cx| codegen.start(request, cx));
--- a/crates/client/src/telemetry.rs
+++ b/crates/client/src/telemetry.rs
@ -261,11 +261,15 @@ impl Telemetry {
        conversation_id: Option<String>,
        kind: AssistantKind,
        model: String,
+        response_latency: Option<Duration>,
+        error_message: Option<String>,
    ) {
        let event = Event::Assistant(AssistantEvent {
            conversation_id,
            kind,
            model: model.to_string(),
+            response_latency,
+            error_message,
        });

        self.report_event(event)
--- a/crates/collab/src/api/events.rs
+++ b/crates/collab/src/api/events.rs
@ -785,6 +785,8 @@ pub struct AssistantEventRow {
    conversation_id: String,
    kind: String,
    model: String,
+    response_latency_in_ms: Option<i64>,
+    error_message: Option<String>,
 }

 impl AssistantEventRow {
@ -811,6 +813,10 @@ impl AssistantEventRow {
            conversation_id: event.conversation_id.unwrap_or_default(),
            kind: event.kind.to_string(),
            model: event.model,
+            response_latency_in_ms: event
+                .response_latency
+                .map(|latency| latency.as_millis() as i64),
+            error_message: event.error_message,
        }
    }
 }
--- a/crates/telemetry_events/src/telemetry_events.rs
+++ b/crates/telemetry_events/src/telemetry_events.rs
@ -1,6 +1,6 @@
 use semantic_version::SemanticVersion;
 use serde::{Deserialize, Serialize};
-use std::{fmt::Display, sync::Arc};
+use std::{fmt::Display, sync::Arc, time::Duration};

 #[derive(Serialize, Deserialize, Debug)]
 pub struct EventRequestBody {
@ -93,6 +93,8 @@ pub struct AssistantEvent {
    pub conversation_id: Option<String>,
    pub kind: AssistantKind,
    pub model: String,
+    pub response_latency: Option<Duration>,
+    pub error_message: Option<String>,
 }

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]