Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ log = "0.4"
env_logger = { version = "0.11", optional = true }
chrono = {version = "0.4", default-features = false, features = ["serde"]}
rand = "0.8"
pin-project = "1"

[[bin]]
name = "llm"
Expand Down
32 changes: 32 additions & 0 deletions src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ pub struct LLMBuilder {
resilient_max_delay_ms: Option<u64>,
/// Resilience: jitter toggle
resilient_jitter: Option<bool>,
/// Enable metrics collection (timing, usage) for non-streaming calls
enable_metrics: Option<bool>,
}

impl LLMBuilder {
Expand Down Expand Up @@ -476,6 +478,31 @@ impl LLMBuilder {
self
}

/// Enables metrics collection for non-streaming chat calls.
///
/// When enabled, `ChatResponse::metrics()` will return timing and usage
/// information. For streaming calls, use `Tracked::new()` wrapper instead.
///
/// # Example
///
/// ```rust,ignore
/// let llm = LLMBuilder::new()
/// .backend(LLMBackend::OpenAI)
/// .api_key("...")
/// .enable_metrics(true)
/// .build()?;
///
/// let response = llm.chat("Hello").await?;
/// if let Some(metrics) = response.metrics() {
/// println!("Duration: {:?}", metrics.duration);
/// println!("Tokens/sec: {:?}", metrics.tokens_per_second());
/// }
/// ```
pub fn enable_metrics(mut self, enable: bool) -> Self {
self.enable_metrics = Some(enable);
self
}

#[deprecated(note = "Renamed to `xai_search_mode`.")]
pub fn search_mode(self, mode: impl Into<String>) -> Self {
self.xai_search_mode(mode)
Expand Down Expand Up @@ -1112,6 +1139,11 @@ impl LLMBuilder {
final_provider = Box::new(crate::resilient_llm::ResilientLLM::new(final_provider, cfg));
}

// Wrap with metrics collection if enabled
if self.enable_metrics.unwrap_or(false) {
final_provider = Box::new(crate::metrics::MetricsProvider::new(final_provider));
}

// Wrap with memory capabilities if memory is configured
if let Some(memory) = self.memory {
let memory_arc = Arc::new(RwLock::new(memory));
Expand Down
73 changes: 73 additions & 0 deletions src/chat/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::collections::HashMap;
use std::fmt;
use std::pin::Pin;
use std::time::Duration;

use async_trait::async_trait;
use futures::stream::{Stream, StreamExt};
Expand All @@ -9,6 +10,9 @@ use serde_json::Value;

use crate::{error::LLMError, ToolCall};

mod tracked;
pub use tracked::{Trackable, Tracked};

/// Usage metadata for a chat response.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Usage {
Expand Down Expand Up @@ -127,6 +131,54 @@ pub struct PromptTokensDetails {
pub audio_tokens: Option<u32>,
}

/// Comprehensive metrics for a chat request including timing and token usage.
///
/// This struct is returned by `ChatResponse::metrics()` when metrics collection
/// is enabled, or by `Tracked::finalize()` for streaming responses.
///
/// # Example
///
/// ```rust,ignore
/// // Non-streaming with metrics enabled
/// let response = llm.chat("Hello").await?;
/// if let Some(metrics) = response.metrics() {
/// println!("Duration: {:?}", metrics.duration);
/// println!("Tokens: {:?}", metrics.usage);
/// println!("Tokens/sec: {:?}", metrics.tokens_per_second());
/// }
///
/// // Streaming with Tracked wrapper
/// let stream = llm.chat_stream_with_tools(messages, None).await?;
/// let mut tracked = Tracked::new(stream);
/// while let Some(chunk) = tracked.next().await { /* ... */ }
/// let metrics = tracked.finalize();
/// println!("TTFT: {:?}", metrics.time_to_first_token);
/// ```
#[derive(Debug, Clone, Default)]
pub struct ChatMetrics {
/// Token usage (prompt, completion, total)
pub usage: Option<Usage>,
/// Total wall-clock duration of the request
pub duration: Duration,
/// Time to first token (streaming only, None for non-streaming)
pub time_to_first_token: Option<Duration>,
}

impl ChatMetrics {
/// Calculate tokens per second (completion tokens / duration).
///
/// Returns `None` if usage data is unavailable or duration is zero.
pub fn tokens_per_second(&self) -> Option<f64> {
let usage = self.usage.as_ref()?;
let secs = self.duration.as_secs_f64();
if secs > 0.0 {
Some(usage.completion_tokens as f64 / secs)
} else {
None
}
}
}

/// Role of a participant in a chat conversation.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ChatRole {
Expand Down Expand Up @@ -356,15 +408,36 @@ impl Serialize for ToolChoice {
}
}

/// Trait for chat response types returned by providers.
///
/// Provides access to the response content, tool calls, usage statistics,
/// and optional metrics when enabled.
pub trait ChatResponse: std::fmt::Debug + std::fmt::Display + Send + Sync {
/// Returns the text content of the response, if any.
fn text(&self) -> Option<String>;

/// Returns tool calls requested by the model, if any.
fn tool_calls(&self) -> Option<Vec<ToolCall>>;

/// Returns the model's thinking/reasoning output, if available.
fn thinking(&self) -> Option<String> {
None
}

/// Returns token usage statistics, if available.
fn usage(&self) -> Option<Usage> {
None
}

/// Returns comprehensive metrics including timing and usage.
///
/// This method returns `Some` only when metrics collection is enabled
/// via `.enable_metrics(true)` on the builder. Otherwise returns `None`.
///
/// For streaming responses, use `Tracked::finalize()` instead.
fn metrics(&self) -> Option<ChatMetrics> {
None
}
}

/// Trait for providers that support chat-style interactions.
Expand Down
Loading