From f7998714db4244c77b0c265c0290a5cb44cf4926 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 24 Jan 2026 18:41:32 +0000
Subject: [PATCH 1/7] research: Add notifications primitive design document

Explores a new primitive for injecting external events into agent context:
- Notification structure and types (source, priority, content)
- Transcript integration via NotificationBlock
- Multiple activation modes (passive, prompt, active)
- Mid-stream injection points analysis
- Priority-based injection strategies
- State machine for turn interruption
---
 research/notifications-primitive.md | 897 ++++++++++++++++++++++++++++
 1 file changed, 897 insertions(+)
 create mode 100644 research/notifications-primitive.md
diff --git a/research/notifications-primitive.md b/research/notifications-primitive.md
new file mode 100644
index 0000000..2a39ebf
--- /dev/null
+++ b/research/notifications-primitive.md
@@ -0,0 +1,897 @@
+# Notifications Primitive
+
+## Problem Statement
+
+The current system has a rigid message flow:
+1. User sends a message (terminal input)
+2. Agent responds (streaming + tool calls)
+3. Compaction can inject summaries (special case)
+
+This model breaks down when we want external events to influence the agent:
+- File system changes (watched files modified)
+- Background task completion (sub-agent finished, build completed)
+- IDE events (cursor moved, file opened, diagnostics changed)
+- Timer/scheduled events (reminder, timeout)
+- External webhooks (CI status, PR review)
+
+Currently, these events either:
+- Get ignored entirely
+- Require user to manually ask about them
+- Use hacky workarounds (fake user messages)
+
+We need a first-class primitive for **Notifications** - events from outside the user-agent conversation that can be injected into context and optionally activate the agent.
+
+---
+
+## Current Architecture Analysis
+
+### Message Flow Today
+
+```
+Terminal Event (user input)
+    ↓
+MessageRequest::User(content, turn_id)
+    ↓
+message_queue.push_back(...)
+    ↓
+handle_message() → agent.send_request()
+    ↓
+Agent streams response
+```
+
+### Context Management
+
+The agent maintains `messages: Vec<ChatMessage>` which maps to LLM API format:
+- `ChatRole::System` - system prompt (first message)
+- `ChatRole::User` - user messages
+- `ChatRole::Assistant` - agent responses (text, tool calls)
+
+The transcript uses `Turn` with `Role::User | Role::Assistant | Role::System` and polymorphic `Block` content.
+
+### Existing "Injection" Patterns
+
+1. **Compaction** (`MessageRequest::Compaction`)
+   - Triggered by token threshold
+   - Agent responds in `RequestMode::Compaction`
+   - Result replaces context via `reset_with_summary()`
+   - Appears as `CompactionBlock` in transcript
+
+2. **IDE Events** (`handle_ide_event()`)
+   - Currently just updates `selected_text` state
+   - No injection into agent context
+   - Agent only sees selection if it requests it
+
+3. **Tool Results**
+   - Injected as tool response messages
+   - Part of the normal request/response flow
+
+---
+
+## Design: Notifications Primitive
+
+### Core Concept
+
+A **Notification** is an event from outside the conversation that:
+1. Has a **source** (what system generated it)
+2. Has a **priority** (how urgent/important)
+3. Has **content** (what happened)
+4. Has an optional **action** (what the agent might do)
+
+Notifications differ from user messages in that they:
+- May not require immediate response
+- Can be batched/coalesced
+- Have semantic meaning (type-based handling)
+- Can be filtered/prioritized
+
+### Notification Structure
+
+```rust
+pub struct Notification {
+    pub id: Uuid,
+    pub source: NotificationSource,
+    pub priority: NotificationPriority,
+    pub content: NotificationContent,
+    pub timestamp: DateTime<Utc>,
+    pub requires_response: bool,
+}
+
+pub enum NotificationSource {
+    FileSystem,       // File watcher events
+    BackgroundTask,   // Sub-agent, build, test completion
+    IDE,              // Editor events (diagnostics, navigation)
+    Timer,            // Scheduled/timeout events
+    External,         // Webhooks, CI, etc.
+    System,           // Internal system events
+}
+
+pub enum NotificationPriority {
+    Low,              // Informational, can be batched
+    Normal,           // Standard priority
+    High,             // Should interrupt current work
+    Critical,         // Must be handled immediately
+}
+
+pub enum NotificationContent {
+    FileChanged { path: PathBuf, change_type: ChangeType },
+    TaskCompleted { task_id: String, result: String },
+    DiagnosticsUpdated { uri: String, diagnostics: Vec<Diagnostic> },
+    TimerFired { name: String },
+    Custom { event_type: String, payload: serde_json::Value },
+}
+```
+
+### Transcript Integration
+
+New block type for notifications:
+
+```rust
+pub struct NotificationBlock {
+    pub notification: Notification,
+    pub status: Status,
+    pub acknowledged: bool,
+}
+
+impl Block for NotificationBlock {
+    fn kind(&self) -> BlockType { BlockType::Notification }
+    // ...
+}
+
+pub enum BlockType {
+    Text,
+    Thinking,
+    Tool,
+    Compaction,
+    Notification,  // NEW
+}
+```
+
+### Message Queue Integration
+
+Extend the message request enum:
+
+```rust
+enum MessageRequest {
+    User(String, usize),
+    Compaction,
+    Command(String, usize),
+    Notification(Notification),  // NEW
+}
+```
+
+### Agent Context Injection
+
+Notifications need to appear in the agent's context. Options:
+
+**Option A: As User Messages (Simple)**
+```rust
+// In agent.send_request() or restore_from_transcript()
+ChatMessage::user(format!(
+    "[NOTIFICATION from {source}]: {content}"
+))
+```
+
+Pros: Works with existing API, no special handling
+Cons: Pollutes user message stream, awkward formatting
+
+**Option B: As System Messages (Semantic)**
+```rust
+// Inject after system prompt, before conversation
+ChatMessage::system(format!(
+    "Notification ({source}, {priority}): {content}"
+))
+```
+
+Pros: Semantically correct, separate from user input
+Cons: Multiple system messages may confuse models
+
+**Option C: Aggregated Context Block (Recommended)**
+```rust
+// Single "notifications context" injected before user's message
+let notifications_context = format!(
+    "<notifications>\n{}\n</notifications>",
+    pending_notifications.iter()
+        .map(|n| format!("- [{:?}] {}", n.source, n.content))
+        .collect::<Vec<_>>()
+        .join("\n")
+);
+
+// Prepend to user message or inject as separate user message
+ChatMessage::user(notifications_context)
+```
+
+Pros: Batched, clear delineation, efficient token use
+Cons: Still a "fake" user message
+
+**Option D: Extended Thinking Prompt (Cleanest)**
+```rust
+// Add to system prompt dynamically
+let dynamic_context = format!(
+    "\n\n## Active Notifications\n{notifications}\n\n\
+     Consider these when responding. Not all require action."
+);
+```
+
+Pros: Natural integration, doesn't pollute conversation
+Cons: Requires system prompt regeneration
+
+---
+
+## Activation Modes
+
+Notifications can trigger different behaviors:
+
+### 1. Passive (Accumulate)
+Notifications queue up silently. Agent sees them on next user message.
+
+```
+User: "fix the bug"
+[Notifications silently accumulated]
+Agent: (sees notifications in context) "I notice the file changed..."
+```
+
+### 2. Prompt (Notify User)
+Show notification to user, let them decide to activate agent.
+
+```
+┌─────────────────────────────────────────────────┐
+│ [!] Build completed with 3 warnings             │
+│     Press Enter to discuss, Esc to dismiss     │
+└─────────────────────────────────────────────────┘
+```
+
+### 3. Active (Auto-Activate)
+High-priority notifications automatically trigger agent response.
+
+```
+[Critical: Tests failing after file save]
+    ↓
+Agent automatically activates
+    ↓
+"I see the tests are now failing. Let me investigate..."
+```
+
+### Configuration
+
+```toml
+[notifications]
+# Global enable/disable
+enabled = true
+
+# Per-source configuration
+[notifications.file_system]
+enabled = true
+activation = "passive"
+debounce_ms = 500
+
+[notifications.background_task]
+enabled = true
+activation = "prompt"
+
+[notifications.ide.diagnostics]
+enabled = true
+activation = "active"
+min_priority = "high"
+```
+
+---
+
+## Event Flow
+
+### Passive Flow
+```
+External Event
+    ↓
+NotificationSource generates Notification
+    ↓
+notification_queue.push(notification)
+    ↓
+[User sends message]
+    ↓
+handle_message():
+    - Drain notification_queue
+    - Inject into context
+    - Send to agent
+    ↓
+Agent responds (aware of notifications)
+```
+
+### Active Flow
+```
+External Event
+    ↓
+NotificationSource generates Notification (priority: High)
+    ↓
+notification_queue.push(notification)
+    ↓
+check_auto_activation():
+    - Priority >= threshold?
+    - Agent idle?
+    - Activation mode == "active"?
+    ↓
+MessageRequest::Notification(notification)
+    ↓
+handle_message():
+    - Create synthetic context
+    - Begin assistant turn
+    - Agent responds proactively
+```
+
+---
+
+## Implementation Components
+
+### 1. NotificationManager
+
+Central hub for notification handling:
+
+```rust
+pub struct NotificationManager {
+    queue: VecDeque<Notification>,
+    config: NotificationConfig,
+    coalescing_window: Duration,
+}
+
+impl NotificationManager {
+    pub fn push(&mut self, notification: Notification);
+    pub fn drain(&mut self) -> Vec<Notification>;
+    pub fn drain_for_context(&mut self) -> Option<String>;
+    pub fn has_pending(&self) -> bool;
+    pub fn should_auto_activate(&self) -> bool;
+}
+```
+
+### 2. NotificationSource Trait
+
+Allow pluggable notification sources:
+
+```rust
+#[async_trait]
+pub trait NotificationSource {
+    fn source_type(&self) -> NotificationSourceType;
+    async fn next(&mut self) -> Option<Notification>;
+}
+
+// Implementations
+pub struct FileWatcher { ... }
+pub struct IdeNotifications { ... }
+pub struct BackgroundTaskMonitor { ... }
+```
+
+### 3. App Integration
+
+```rust
+// In App struct
+notification_manager: NotificationManager,
+notification_sources: Vec<Box<dyn NotificationSource>>,
+
+// In event loop
+loop {
+    tokio::select! {
+        // ... existing branches ...
+
+        // Notification sources
+        Some(notification) = poll_notification_sources() => {
+            self.notification_manager.push(notification);
+            if self.notification_manager.should_auto_activate() {
+                self.message_queue.push_back(
+                    MessageRequest::Notification(notification)
+                );
+            }
+        }
+    }
+}
+```
+
+### 4. UI Integration
+
+Status bar indicator for pending notifications:
+
+```
+┌─────────────────────────────────────────────────────────┐
+│ codey v0.1.0  │  tokens: 12.4k  │  [3 notifications]   │
+└─────────────────────────────────────────────────────────┘
+```
+
+Notification panel (optional):
+
+```
+┌─ Notifications ─────────────────────────────────────────┐
+│ [fs] src/main.rs modified                    2s ago    │
+│ [bg] Build completed successfully            5s ago    │
+│ [ide] 2 new diagnostics in lib.rs           10s ago    │
+└─────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Comparison with Existing Patterns
+
+| Aspect | User Message | Compaction | Notification |
+|--------|--------------|------------|--------------|
+| Source | User input | Token threshold | External events |
+| Trigger | Explicit | Automatic | Configurable |
+| Urgency | Immediate | Delayed | Varies |
+| Context impact | Full turn | Replaces context | Injected |
+| User visibility | Full | Summary | Optional |
+| Agent response | Required | Required | Optional |
+
+---
+
+## Use Cases
+
+### 1. File Watcher Integration
+```
+User: "I'm going to edit the config file manually"
+Agent: "Sure, I'll wait"
+[User edits file externally]
+[Notification: config.toml modified]
+Agent: (on next message, aware of change)
+  "I see you updated config.toml. The new timeout value looks good."
+```
+
+### 2. Background Task Completion
+```
+User: "Run the full test suite in the background"
+Agent: (spawns background task)
+[User continues chatting about other things]
+[Notification: test suite completed - 3 failures]
+[UI shows notification badge]
+User: (presses Enter to discuss)
+Agent: "The test suite finished. 3 tests failed in the auth module..."
+```
+
+### 3. IDE Diagnostics
+```
+[Notification: New error in main.rs:45]
+[Auto-activation triggered]
+Agent: "I notice a new type error appeared on line 45.
+        This is likely from the change I just made. Let me fix it."
+```
+
+### 4. CI/CD Integration
+```
+[Notification: PR #123 checks failed]
+Agent: "The CI checks failed on your PR. The linting step
+        found 2 issues. Would you like me to fix them?"
+```
+
+---
+
+## Mid-Stream Injection
+
+### The Current Limitation
+
+Today's event loop has a strict ordering:
+
+```rust
+// message_queue only drains when agent is idle
+Some(request) = async { self.message_queue.pop_front() },
+    if self.input_mode == InputMode::Normal => {  // <-- BLOCKED during agent turn
+    self.handle_message(request).await?;
+}
+```
+
+This means:
+- While agent is streaming: no new messages processed
+- While agent is thinking: no new messages processed
+- While tools execute: no new messages processed
+- While awaiting approval: no new messages processed
+
+Notifications must wait until the entire turn completes.
+
+### Why Mid-Stream Injection Matters
+
+Consider these scenarios:
+
+**Scenario 1: Long-Running Tool**
+```
+Agent: "Let me run the full test suite..."
+[Tool executing: 45 seconds]
+[File changes detected - user saved a fix]
+[Notification queued... waiting... waiting...]
+[Tests finish with old code]
+Agent: "Tests failed"
+[NOW notification delivered - too late!]
+```
+
+**Scenario 2: Streaming Response**
+```
+Agent: (streaming) "Based on my analysis of the codebase..."
+[IDE: new diagnostic - type error on line 45]
+[Agent continues for 30 more seconds, unaware]
+Agent: "...and that's my recommendation"
+[NOW notification delivered]
+User: "But there's a type error now"
+```
+
+**Scenario 3: Multi-Tool Turn**
+```
+Agent: Calls tool A, then tool B, then tool C
+[Between tool A and B: critical notification arrives]
+[Agent continues with stale understanding]
+```
+
+### Injection Points
+
+Where could we inject notifications mid-stream?
+
+```
+Agent Turn Lifecycle:
+    │
+    ├─► send_request()
+    │       │
+    │       ├─► LLM streaming begins ──────────────► [Injection Point 1]
+    │       │       │                                 Between chunks?
+    │       │       ├─► text delta                    Risky: mid-thought
+    │       │       ├─► text delta
+    │       │       └─► tool_call
+    │       │
+    │       ├─► Tool execution ────────────────────► [Injection Point 2]
+    │       │       │                                 Before tool runs?
+    │       │       ├─► awaiting approval
+    │       │       ├─► tool running
+    │       │       └─► tool complete
+    │       │
+    │       ├─► submit_tool_result() ──────────────► [Injection Point 3]
+    │       │       │                                 With tool result?
+    │       │       └─► next iteration
+    │       │
+    │       └─► Finished
+    │
+    └─► Turn complete ─────────────────────────────► [Injection Point 4]
+                                                      Current behavior
+```
+
+### Injection Point Analysis
+
+#### Point 1: During LLM Streaming
+**Feasibility**: Very difficult
+- Can't modify an in-flight API request
+- Would need to cancel and restart with new context
+- Loses streaming progress, poor UX
+
+**When useful**: Critical notifications that invalidate current response
+
+#### Point 2: Before Tool Execution
+**Feasibility**: Possible
+- Tool hasn't run yet
+- Could prepend notification context to tool result
+- Or cancel tool and restart turn
+
+**Implementation**:
+```rust
+// In tool execution flow
+async fn execute_tool(&mut self, tool_call: ToolCall) -> ToolResult {
+    // Check for critical notifications before running
+    if let Some(notification) = self.check_critical_notifications() {
+        return ToolResult::Interrupted {
+            reason: notification,
+            should_restart: true,
+        };
+    }
+
+    // Proceed with tool execution
+    self.run_tool(tool_call).await
+}
+```
+
+#### Point 3: With Tool Result (Recommended)
+**Feasibility**: Good
+- Natural injection point in the request/response cycle
+- Tool result is already being assembled
+- Agent will immediately see notification in next iteration
+
+**Implementation**:
+```rust
+// When building tool result message
+fn build_tool_result(&self, call_id: &str, result: &str) -> ChatMessage {
+    let notifications = self.notification_manager.drain_for_injection();
+
+    let content = if let Some(notifs) = notifications {
+        format!(
+            "{result}\n\n<notifications>\n{notifs}\n</notifications>"
+        )
+    } else {
+        result.to_string()
+    };
+
+    ChatMessage::tool_response(call_id, content)
+}
+```
+
+**Pros**:
+- Clean integration with existing flow
+- Agent sees notification before next action
+- Doesn't break streaming or tool execution
+
+**Cons**:
+- Notification bundled with unrelated tool result
+- May confuse the model
+
+#### Point 4: Turn Boundary (Current)
+**Feasibility**: Implemented (current behavior)
+**When useful**: Non-urgent notifications, passive mode
+
+### Hybrid Approach: Priority-Based Injection
+
+Different priorities use different injection points:
+
+```rust
+pub enum NotificationPriority {
+    Low,       // Point 4: Wait for turn end
+    Normal,    // Point 4: Wait for turn end
+    High,      // Point 3: Inject with next tool result
+    Critical,  // Point 2: Interrupt before tool execution
+}
+
+impl NotificationManager {
+    pub fn check_for_injection(&mut self, injection_point: InjectionPoint)
+        -> Option<Vec<Notification>>
+    {
+        let threshold = match injection_point {
+            InjectionPoint::ToolResult => NotificationPriority::High,
+            InjectionPoint::BeforeTool => NotificationPriority::Critical,
+            InjectionPoint::TurnEnd => NotificationPriority::Low,
+        };
+
+        self.drain_at_priority(threshold)
+    }
+}
+```
+
+### Turn Interruption Model
+
+For critical notifications, we may need to interrupt and restart:
+
+```rust
+pub enum TurnInterrupt {
+    // Continue current turn, agent will see notification in context
+    InjectAndContinue { notification: Notification },
+
+    // Cancel current action, restart with notification
+    CancelAndRestart { notification: Notification },
+
+    // Complete current turn, then handle notification
+    QueueForNext { notification: Notification },
+}
+
+// In agent streaming loop
+match self.check_interrupt() {
+    Some(TurnInterrupt::CancelAndRestart { notification }) => {
+        // Stop current streaming
+        self.cancel_current_request();
+
+        // Inject notification into context
+        self.inject_notification(notification);
+
+        // Restart the turn
+        self.send_request(self.last_prompt, mode);
+    }
+    Some(TurnInterrupt::InjectAndContinue { notification }) => {
+        // Will appear in next tool result
+        self.pending_injection = Some(notification);
+    }
+    None => {
+        // Continue normally
+    }
+}
+```
+
+### Streaming Context Window
+
+A more sophisticated approach: maintain a "context window" that can be updated:
+
+```
+┌─────────────────────────────────────────────────────────┐
+│ System Prompt                                           │
+├─────────────────────────────────────────────────────────┤
+│ [Dynamic Context Window]          ◄── Can be updated    │
+│ - Current notifications                                 │
+│ - Recent file changes                                   │
+│ - IDE state                                             │
+├─────────────────────────────────────────────────────────┤
+│ Conversation History                                    │
+│ User: ...                                               │
+│ Assistant: ...                                          │
+├─────────────────────────────────────────────────────────┤
+│ Current Turn                                            │
+│ User: "fix the bug"                                     │
+│ Assistant: (streaming...)                               │
+└─────────────────────────────────────────────────────────┘
+```
+
+The "Dynamic Context Window" could be:
+- Updated between tool calls
+- Refreshed on turn restart
+- Limited size (token budget)
+
+### API Considerations
+
+Current Anthropic API doesn't support:
+- Modifying in-flight requests
+- Injecting content mid-stream
+- Multiple system messages (cleanly)
+
+Workarounds:
+1. **Tool result injection**: Append notification to tool results
+2. **Turn restart**: Cancel and re-send with new context
+3. **System prompt refresh**: Update system prompt between turns
+
+Future API features that would help:
+- Server-sent events for context updates
+- Interruptible streaming
+- Dynamic system context
+
+### State Machine View
+
+```
+                    ┌─────────────────────┐
+                    │       IDLE          │
+                    │  (accepts messages) │
+                    └──────────┬──────────┘
+                               │ user message
+                               ▼
+                    ┌─────────────────────┐
+         ┌─────────│     STREAMING       │─────────┐
+         │         │  (LLM generating)   │         │
+         │         └──────────┬──────────┘         │
+         │                    │                    │
+    critical              tool_call            finished
+    notification              │                    │
+         │                    ▼                    │
+         │         ┌─────────────────────┐         │
+         │         │   TOOL_PENDING      │         │
+         │         │  (awaiting tool)    │         │
+         │         └──────────┬──────────┘         │
+         │                    │                    │
+         │    ┌───────────────┼───────────────┐    │
+         │    │               │               │    │
+         │ critical      tool_done        high │    │
+         │ notif             │            notif│    │
+         │    │              ▼               │    │
+         │    │    ┌─────────────────────┐   │    │
+         │    │    │  TOOL_COMPLETE      │   │    │
+         │    │    │ (result ready)      │───┘    │
+         │    │    └──────────┬──────────┘        │
+         │    │               │                   │
+         │    │          inject with              │
+         │    │          tool result              │
+         │    │               │                   │
+         │    │               ▼                   │
+         │    │    ┌─────────────────────┐        │
+         │    └───►│   CONTINUING        │◄───────┘
+         │         │  (next iteration)   │
+         │         └──────────┬──────────┘
+         │                    │
+         │                    └──────────┐
+         │                               │
+         ▼                               ▼
+┌─────────────────────┐       ┌─────────────────────┐
+│  INTERRUPTED        │       │       IDLE          │
+│  (restart turn)     │──────►│  (turn complete)    │
+└─────────────────────┘       └─────────────────────┘
+```
+
+### Implementation Complexity
+
+| Injection Strategy | Complexity | UX Impact | Use Case |
+|-------------------|------------|-----------|----------|
+| Turn boundary | Low | None | Default, non-urgent |
+| Tool result | Medium | Minimal | High priority |
+| Before tool | Medium | Tool cancelled | Critical |
+| Mid-stream | High | Response restart | Emergency only |
+| Context window | High | Seamless | Ideal future |
+
+### Recommendation
+
+**Phase 1**: Tool result injection for High priority
+- Lowest risk
+- Natural integration point
+- Agent sees notification before next action
+
+**Phase 2**: Before-tool interruption for Critical
+- Can cancel unnecessary work
+- Restart with fresh context
+- Clear UX: "Interrupted by notification"
+
+**Phase 3**: Explore context window approach
+- Requires more architectural changes
+- Best long-term UX
+- May need API evolution
+
+---
+
+## Open Questions
+
+1. **Notification Persistence**
+   - Should notifications persist across sessions?
+   - Save to transcript vs. separate notification log?
+
+2. **Coalescing Strategy**
+   - How to merge rapid file changes?
+   - Window-based vs. semantic deduplication?
+
+3. **Priority Inference**
+   - Can we auto-detect priority from content?
+   - ML-based importance scoring?
+
+4. **Agent Notification Requests**
+   - Should agents be able to request notifications?
+   - "Notify me when the build finishes"
+
+5. **Notification Actions**
+   - Pre-defined actions agents can take?
+   - "Acknowledge", "Investigate", "Dismiss"?
+
+6. **Rate Limiting**
+   - Prevent notification storms
+   - Per-source rate limits?
+
+7. **Context Budget**
+   - How many notification tokens to allow?
+   - Summarization for old notifications?
+
+---
+
+## Phased Implementation
+
+### Phase 1: Foundation
+- [ ] Define `Notification` types and structures
+- [ ] Add `NotificationBlock` to transcript
+- [ ] Create `NotificationManager` with basic queue
+- [ ] Add `MessageRequest::Notification` variant
+
+### Phase 2: Integration
+- [ ] Inject notifications into agent context
+- [ ] Add passive accumulation mode
+- [ ] Status bar notification indicator
+- [ ] Configuration options
+
+### Phase 3: Sources
+- [ ] File watcher notification source
+- [ ] Background task completion notifications
+- [ ] IDE diagnostic notifications
+
+### Phase 4: Activation
+- [ ] Prompt mode with UI
+- [ ] Auto-activation for high priority
+- [ ] Notification panel UI
+
+### Phase 5: Advanced
+- [ ] Coalescing and deduplication
+- [ ] Notification persistence
+- [ ] Agent-requested notifications
+- [ ] External webhook integration
+
+---
+
+## Relationship to Sub-Agent Architecture
+
+The Notifications primitive complements the sub-agent work:
+
+- **Sub-agents** spawn and run to completion, returning results as tool output
+- **Notifications** signal when background work completes
+- Combined: Sub-agent spawns in background → Notification when done → Agent can discuss results
+
+```
+Primary Agent
+    │
+    ├─► spawn_background_task("run tests")
+    │       └──────────────────────► Background Runner
+    │                                       │
+    ├─► continues conversation              ├─► running...
+    │   with user                           │
+    │                                       └─► complete
+    │                                       │
+    ◄───────── Notification ────────────────┘
+    │   "Tests completed: 2 failures"
+    │
+    ├─► [Prompt mode] User sees notification
+    │   OR
+    ├─► [Active mode] Agent auto-responds
+```
+
+This creates a complete async work model where:
+1. Sub-agents handle the actual background work
+2. Notifications handle the signaling/awareness
+3. The primary conversation remains responsive

From e5eadf72e3e0bf040884ce743a5cad421d864368 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 24 Jan 2026 18:48:16 +0000
Subject: [PATCH 2/7] research: Add pseudo-tool injection approach for
 notifications

Explores using synthetic tool calls to inject notifications mid-turn,
leveraging existing tool infrastructure. Key insight: notifications
can be injected between tool calls without agent explicitly checking.
---
 research/notifications-primitive.md | 43 +++++++++++++++--------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/research/notifications-primitive.md b/research/notifications-primitive.md
index 2a39ebf..0f74839 100644
--- a/research/notifications-primitive.md
+++ b/research/notifications-primitive.md
@@ -612,32 +612,33 @@ fn build_tool_result(&self, call_id: &str, result: &str) -> ChatMessage {
 **Feasibility**: Implemented (current behavior)
 **When useful**: Non-urgent notifications, passive mode
 
-### Hybrid Approach: Priority-Based Injection
+### Pseudo-Tool Injection (Recommended Approach)
 
-Different priorities use different injection points:
+Instead of complicating injection with priorities and interruption logic, we can leverage the existing tool use pattern. Since the agent already expects interspersed tool calls and results, we can **synthesize** tool calls to carry notifications - the agent doesn't need to remember to check for them.
 
-```rust
-pub enum NotificationPriority {
-    Low,       // Point 4: Wait for turn end
-    Normal,    // Point 4: Wait for turn end
-    High,      // Point 3: Inject with next tool result
-    Critical,  // Point 2: Interrupt before tool execution
-}
+#### The Insight
 
-impl NotificationManager {
-    pub fn check_for_injection(&mut self, injection_point: InjectionPoint)
-        -> Option<Vec<Notification>>
-    {
-        let threshold = match injection_point {
-            InjectionPoint::ToolResult => NotificationPriority::High,
-            InjectionPoint::BeforeTool => NotificationPriority::Critical,
-            InjectionPoint::TurnEnd => NotificationPriority::Low,
-        };
+The agent's context already looks like this during a multi-tool turn:
+
+```
+User: "fix the bug and run tests"
+Assistant: [text] "Let me fix that..." [tool_call: Edit]
+Tool Result: "File updated successfully"
+Assistant: [text] "Now running tests..." [tool_call: Bash]
+Tool Result: "3 tests passed, 1 failed"
+Assistant: [text] "One test failed, let me check..."
+```
+
+We can inject a **synthetic tool call + result** that the agent didn't explicitly request:
 
-        self.drain_at_priority(threshold)
-    }
-}
 ```
+User: "fix the bug and run tests"
+Assistant: [text] "Let me fix that..." [tool_call: Edit]
+Tool Result: "File updated successfully"
+                                            ← INJECT HERE
+[Synthetic tool_call: _notification]        ← We add this
+[Synthetic result: "File src/lib.rs was     ← And this
+ modified externally"]
 
 ### Turn Interruption Model
 

From 9136de10ea761bd4103e443c5f9ec9663d802312 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 24 Jan 2026 18:57:27 +0000
Subject: [PATCH 3/7] research: Add tool result augmentation as primary
 approach

Based on observation of Claude Code's <system-reminder> pattern:
- Approach A (recommended): Append notifications to tool results with XML
- Approach B (alternative): Synthetic tool call injection

Key insight: tool results are unstructured text, so we can append
notification content directly with XML delimiters. No fake tool calls
needed - same call_id, just richer content.
---
 research/notifications-primitive.md | 169 ++++++++++++++++++++++++----
 1 file changed, 144 insertions(+), 25 deletions(-)

diff --git a/research/notifications-primitive.md b/research/notifications-primitive.md
index 0f74839..1714ddb 100644
--- a/research/notifications-primitive.md
+++ b/research/notifications-primitive.md
@@ -612,47 +612,166 @@ fn build_tool_result(&self, call_id: &str, result: &str) -> ChatMessage {
 **Feasibility**: Implemented (current behavior)
 **When useful**: Non-urgent notifications, passive mode
 
-### Pseudo-Tool Injection (Recommended Approach)
+### Approach A: Tool Result Augmentation (Recommended)
 
-Instead of complicating injection with priorities and interruption logic, we can leverage the existing tool use pattern. Since the agent already expects interspersed tool calls and results, we can **synthesize** tool calls to carry notifications - the agent doesn't need to remember to check for them.
+**Observed in**: Claude Code's background agent system (`<system-reminder>` tags)
 
-#### The Insight
+The simplest approach: append notification content directly to existing tool results using XML delimiters. No synthetic tool calls, no new message types - just string concatenation with semantic markup.
 
-The agent's context already looks like this during a multi-tool turn:
+#### How It Works
+
+Tool results in the Anthropic API are text content, not parsed JSON. This means we can append arbitrary content to them:
 
 ```
-User: "fix the bug and run tests"
-Assistant: [text] "Let me fix that..." [tool_call: Edit]
-Tool Result: "File updated successfully"
-Assistant: [text] "Now running tests..." [tool_call: Bash]
-Tool Result: "3 tests passed, 1 failed"
-Assistant: [text] "One test failed, let me check..."
+Assistant: [tool_call id="edit_1" name="Edit"]
+ToolResult(id="edit_1"): "File updated successfully
+
+<notification source=\"file_watcher\">
+src/lib.rs was modified externally
+</notification>"
 ```
 
-We can inject a **synthetic tool call + result** that the agent didn't explicitly request:
+The agent receives this as a single tool result and interprets the XML semantically. Same `call_id` - just richer content.
 
+#### Implementation
+
+```rust
+fn submit_tool_result_with_notifications(
+    &mut self,
+    call_id: &str,
+    result: &str,
+    notifications: &[Notification],
+) {
+    let content = if notifications.is_empty() {
+        result.to_string()
+    } else {
+        let notif_xml = notifications.iter()
+            .map(|n| format!(
+                "<notification source=\"{:?}\">\n{}\n</notification>",
+                n.source, n.to_message()
+            ))
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        format!("{result}\n\n{notif_xml}")
+    };
+
+    self.messages.push(ChatMessage::tool_response(call_id, &content));
+}
+```
+
+#### System Prompt Addition
+
+```
+You may see <notification> tags in tool results. These are external events
+(file changes, background task completions, etc.) that occurred while you
+were working. Consider them when deciding your next action.
 ```
-User: "fix the bug and run tests"
-Assistant: [text] "Let me fix that..." [tool_call: Edit]
-Tool Result: "File updated successfully"
-                                            ← INJECT HERE
-[Synthetic tool_call: _notification]        ← We add this
-[Synthetic result: "File src/lib.rs was     ← And this
- modified externally"]
 
-### Turn Interruption Model
+#### Observed Example (Claude Code)
+
+When a user sends a message while the agent is mid-turn, it appears appended to tool results:
+
+```
+ToolResult(id="edit_1"): "<error>String not found...</error>
+
+<system-reminder>
+The user sent the following message:
+what about this other approach?
+
+Please address this message and continue with your tasks.
+</system-reminder>"
+```
+
+The agent sees this naturally and incorporates the message without needing a separate turn.
+
+#### Why This Works
+
+1. **API compatible**: Tool results are unstructured text - append anything
+2. **No ID management**: Reuses existing tool call ID
+3. **Proven pattern**: Claude Code uses this for mid-turn user message injection
+4. **Clear boundaries**: XML makes tool output vs notification unambiguous
+5. **Zero overhead**: Just string formatting
+
+---
+
+### Approach B: Synthetic Tool Injection (Alternative)
 
-For critical notifications, we may need to interrupt and restart:
+For cases where notifications should appear as distinct "events" in the message history.
+
+#### How It Works
+
+Inject a synthetic tool call + result pair:
+
+```
+Assistant: [tool_call id="edit_1" name="Edit"]
+ToolResult(id="edit_1"): "Success"
+Assistant: [tool_call id="notif_1" name="_system_notification"]  ← Synthetic
+ToolResult(id="notif_1"): "File src/lib.rs modified externally"  ← Synthetic
+```
+
+#### Implementation
+
+```rust
+pub const NOTIFICATION_TOOL: &str = "_system_notification";
+
+fn inject_notification(&mut self, notification: Notification) {
+    let call_id = format!("notif_{}", Uuid::new_v4());
+
+    // Synthetic tool call
+    self.messages.push(ChatMessage {
+        role: ChatRole::Assistant,
+        content: MessageContent::default()
+            .append(ContentPart::ToolCall(GenaiToolCall {
+                id: call_id.clone(),
+                name: NOTIFICATION_TOOL.to_string(),
+                arguments: "{}".to_string(),
+            })),
+        options: None,
+    });
+
+    // Synthetic tool result
+    self.messages.push(ChatMessage::tool_response(
+        &call_id,
+        &notification.to_message(),
+    ));
+}
+```
+
+#### Trade-offs
+
+| Aspect | Tool Result Augmentation | Synthetic Tool Injection |
+|--------|-------------------------|-------------------------|
+| Complexity | Low | Medium |
+| API changes | None | Tool definition needed |
+| Message count | Same | +2 per notification |
+| Transcript clarity | Embedded in tool | Distinct events |
+| Proven | Yes (Claude Code) | Theoretical |
+| Token overhead | Minimal | Higher |
+
+---
+
+### Recommendation
+
+**Use Approach A** (Tool Result Augmentation) as primary:
+- Proven in production (Claude Code)
+- Simplest implementation
+- No schema changes needed
+
+**Consider Approach B** if:
+- Notifications need distinct transcript entries
+- Agent should explicitly acknowledge notifications
+
+---
+
+### Other Considered Approaches
+
+#### Turn Interruption (for critical notifications)
 
 ```rust
 pub enum TurnInterrupt {
-    // Continue current turn, agent will see notification in context
     InjectAndContinue { notification: Notification },
-
-    // Cancel current action, restart with notification
     CancelAndRestart { notification: Notification },
-
-    // Complete current turn, then handle notification
     QueueForNext { notification: Notification },
 }
 

From 52d663557301259d09494f7e07c6075a4a7ee7bd Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 24 Jan 2026 19:47:15 +0000
Subject: [PATCH 4/7] research: Rewrite notifications doc based on discussion

Streamlined to focus on key findings:
- Tool results are unstructured text (key insight)
- Approach A: Append to tool results with XML (recommended, proven in Claude Code)
- Approach B: Synthetic tool injection (alternative)
- Comparison and recommendation

Removed verbose sections on priorities, activation modes, state machines.
---
 research/notifications-primitive.md | 948 +++-------------------------
 1 file changed, 84 insertions(+), 864 deletions(-)

diff --git a/research/notifications-primitive.md b/research/notifications-primitive.md
index 1714ddb..2fe3431 100644
--- a/research/notifications-primitive.md
+++ b/research/notifications-primitive.md
@@ -2,638 +2,84 @@
 
 ## Problem Statement
 
-The current system has a rigid message flow:
-1. User sends a message (terminal input)
+The current message flow is rigid:
+1. User sends a message
 2. Agent responds (streaming + tool calls)
 3. Compaction can inject summaries (special case)
 
-This model breaks down when we want external events to influence the agent:
-- File system changes (watched files modified)
-- Background task completion (sub-agent finished, build completed)
-- IDE events (cursor moved, file opened, diagnostics changed)
-- Timer/scheduled events (reminder, timeout)
-- External webhooks (CI status, PR review)
-
-Currently, these events either:
-- Get ignored entirely
-- Require user to manually ask about them
-- Use hacky workarounds (fake user messages)
-
-We need a first-class primitive for **Notifications** - events from outside the user-agent conversation that can be injected into context and optionally activate the agent.
-
----
-
-## Current Architecture Analysis
-
-### Message Flow Today
-
-```
-Terminal Event (user input)
-    ↓
-MessageRequest::User(content, turn_id)
-    ↓
-message_queue.push_back(...)
-    ↓
-handle_message() → agent.send_request()
-    ↓
-Agent streams response
-```
-
-### Context Management
-
-The agent maintains `messages: Vec<ChatMessage>` which maps to LLM API format:
-- `ChatRole::System` - system prompt (first message)
-- `ChatRole::User` - user messages
-- `ChatRole::Assistant` - agent responses (text, tool calls)
-
-The transcript uses `Turn` with `Role::User | Role::Assistant | Role::System` and polymorphic `Block` content.
-
-### Existing "Injection" Patterns
-
-1. **Compaction** (`MessageRequest::Compaction`)
-   - Triggered by token threshold
-   - Agent responds in `RequestMode::Compaction`
-   - Result replaces context via `reset_with_summary()`
-   - Appears as `CompactionBlock` in transcript
-
-2. **IDE Events** (`handle_ide_event()`)
-   - Currently just updates `selected_text` state
-   - No injection into agent context
-   - Agent only sees selection if it requests it
-
-3. **Tool Results**
-   - Injected as tool response messages
-   - Part of the normal request/response flow
-
----
-
-## Design: Notifications Primitive
-
-### Core Concept
-
-A **Notification** is an event from outside the conversation that:
-1. Has a **source** (what system generated it)
-2. Has a **priority** (how urgent/important)
-3. Has **content** (what happened)
-4. Has an optional **action** (what the agent might do)
-
-Notifications differ from user messages in that they:
-- May not require immediate response
-- Can be batched/coalesced
-- Have semantic meaning (type-based handling)
-- Can be filtered/prioritized
-
-### Notification Structure
-
-```rust
-pub struct Notification {
-    pub id: Uuid,
-    pub source: NotificationSource,
-    pub priority: NotificationPriority,
-    pub content: NotificationContent,
-    pub timestamp: DateTime<Utc>,
-    pub requires_response: bool,
-}
-
-pub enum NotificationSource {
-    FileSystem,       // File watcher events
-    BackgroundTask,   // Sub-agent, build, test completion
-    IDE,              // Editor events (diagnostics, navigation)
-    Timer,            // Scheduled/timeout events
-    External,         // Webhooks, CI, etc.
-    System,           // Internal system events
-}
-
-pub enum NotificationPriority {
-    Low,              // Informational, can be batched
-    Normal,           // Standard priority
-    High,             // Should interrupt current work
-    Critical,         // Must be handled immediately
-}
-
-pub enum NotificationContent {
-    FileChanged { path: PathBuf, change_type: ChangeType },
-    TaskCompleted { task_id: String, result: String },
-    DiagnosticsUpdated { uri: String, diagnostics: Vec<Diagnostic> },
-    TimerFired { name: String },
-    Custom { event_type: String, payload: serde_json::Value },
-}
-```
-
-### Transcript Integration
-
-New block type for notifications:
-
-```rust
-pub struct NotificationBlock {
-    pub notification: Notification,
-    pub status: Status,
-    pub acknowledged: bool,
-}
-
-impl Block for NotificationBlock {
-    fn kind(&self) -> BlockType { BlockType::Notification }
-    // ...
-}
-
-pub enum BlockType {
-    Text,
-    Thinking,
-    Tool,
-    Compaction,
-    Notification,  // NEW
-}
-```
-
-### Message Queue Integration
-
-Extend the message request enum:
+External events (file changes, background task completion, IDE diagnostics) have no way to enter the agent's context mid-turn. The message queue is blocked while the agent is working:
 
 ```rust
-enum MessageRequest {
-    User(String, usize),
-    Compaction,
-    Command(String, usize),
-    Notification(Notification),  // NEW
+Some(request) = async { self.message_queue.pop_front() },
+    if self.input_mode == InputMode::Normal => {  // BLOCKED during agent turn
+    self.handle_message(request).await?;
 }
 ```
 
-### Agent Context Injection
-
-Notifications need to appear in the agent's context. Options:
-
-**Option A: As User Messages (Simple)**
-```rust
-// In agent.send_request() or restore_from_transcript()
-ChatMessage::user(format!(
-    "[NOTIFICATION from {source}]: {content}"
-))
-```
-
-Pros: Works with existing API, no special handling
-Cons: Pollutes user message stream, awkward formatting
-
-**Option B: As System Messages (Semantic)**
-```rust
-// Inject after system prompt, before conversation
-ChatMessage::system(format!(
-    "Notification ({source}, {priority}): {content}"
-))
-```
-
-Pros: Semantically correct, separate from user input
-Cons: Multiple system messages may confuse models
-
-**Option C: Aggregated Context Block (Recommended)**
-```rust
-// Single "notifications context" injected before user's message
-let notifications_context = format!(
-    "<notifications>\n{}\n</notifications>",
-    pending_notifications.iter()
-        .map(|n| format!("- [{:?}] {}", n.source, n.content))
-        .collect::<Vec<_>>()
-        .join("\n")
-);
-
-// Prepend to user message or inject as separate user message
-ChatMessage::user(notifications_context)
-```
-
-Pros: Batched, clear delineation, efficient token use
-Cons: Still a "fake" user message
-
-**Option D: Extended Thinking Prompt (Cleanest)**
-```rust
-// Add to system prompt dynamically
-let dynamic_context = format!(
-    "\n\n## Active Notifications\n{notifications}\n\n\
-     Consider these when responding. Not all require action."
-);
-```
-
-Pros: Natural integration, doesn't pollute conversation
-Cons: Requires system prompt regeneration
+We need a way to inject **Notifications** into the agent's context without waiting for the turn to complete.
 
 ---
 
-## Activation Modes
+## Key Insight: Tool Results Are Unstructured Text
 
-Notifications can trigger different behaviors:
-
-### 1. Passive (Accumulate)
-Notifications queue up silently. Agent sees them on next user message.
-
-```
-User: "fix the bug"
-[Notifications silently accumulated]
-Agent: (sees notifications in context) "I notice the file changed..."
-```
-
-### 2. Prompt (Notify User)
-Show notification to user, let them decide to activate agent.
-
-```
-┌─────────────────────────────────────────────────┐
-│ [!] Build completed with 3 warnings             │
-│     Press Enter to discuss, Esc to dismiss     │
-└─────────────────────────────────────────────────┘
-```
-
-### 3. Active (Auto-Activate)
-High-priority notifications automatically trigger agent response.
-
-```
-[Critical: Tests failing after file save]
-    ↓
-Agent automatically activates
-    ↓
-"I see the tests are now failing. Let me investigate..."
-```
-
-### Configuration
-
-```toml
-[notifications]
-# Global enable/disable
-enabled = true
-
-# Per-source configuration
-[notifications.file_system]
-enabled = true
-activation = "passive"
-debounce_ms = 500
-
-[notifications.background_task]
-enabled = true
-activation = "prompt"
-
-[notifications.ide.diagnostics]
-enabled = true
-activation = "active"
-min_priority = "high"
-```
-
----
-
-## Event Flow
-
-### Passive Flow
-```
-External Event
-    ↓
-NotificationSource generates Notification
-    ↓
-notification_queue.push(notification)
-    ↓
-[User sends message]
-    ↓
-handle_message():
-    - Drain notification_queue
-    - Inject into context
-    - Send to agent
-    ↓
-Agent responds (aware of notifications)
-```
-
-### Active Flow
-```
-External Event
-    ↓
-NotificationSource generates Notification (priority: High)
-    ↓
-notification_queue.push(notification)
-    ↓
-check_auto_activation():
-    - Priority >= threshold?
-    - Agent idle?
-    - Activation mode == "active"?
-    ↓
-MessageRequest::Notification(notification)
-    ↓
-handle_message():
-    - Create synthetic context
-    - Begin assistant turn
-    - Agent responds proactively
-```
-
----
-
-## Implementation Components
-
-### 1. NotificationManager
-
-Central hub for notification handling:
-
-```rust
-pub struct NotificationManager {
-    queue: VecDeque<Notification>,
-    config: NotificationConfig,
-    coalescing_window: Duration,
-}
-
-impl NotificationManager {
-    pub fn push(&mut self, notification: Notification);
-    pub fn drain(&mut self) -> Vec<Notification>;
-    pub fn drain_for_context(&mut self) -> Option<String>;
-    pub fn has_pending(&self) -> bool;
-    pub fn should_auto_activate(&self) -> bool;
-}
-```
-
-### 2. NotificationSource Trait
-
-Allow pluggable notification sources:
-
-```rust
-#[async_trait]
-pub trait NotificationSource {
-    fn source_type(&self) -> NotificationSourceType;
-    async fn next(&mut self) -> Option<Notification>;
+Tool definitions in the Anthropic API use JSON schemas for structured input:
+```json
+{
+  "name": "Edit",
+  "parameters": { "type": "object", "properties": { ... } }
 }
-
-// Implementations
-pub struct FileWatcher { ... }
-pub struct IdeNotifications { ... }
-pub struct BackgroundTaskMonitor { ... }
 ```
 
-### 3. App Integration
-
-```rust
-// In App struct
-notification_manager: NotificationManager,
-notification_sources: Vec<Box<dyn NotificationSource>>,
-
-// In event loop
-loop {
-    tokio::select! {
-        // ... existing branches ...
-
-        // Notification sources
-        Some(notification) = poll_notification_sources() => {
-            self.notification_manager.push(notification);
-            if self.notification_manager.should_auto_activate() {
-                self.message_queue.push_back(
-                    MessageRequest::Notification(notification)
-                );
-            }
-        }
-    }
+But tool **results** are just text content:
+```json
+{
+  "type": "tool_result",
+  "tool_use_id": "toolu_abc123",
+  "content": "File updated successfully."
 }
 ```
 
-### 4. UI Integration
-
-Status bar indicator for pending notifications:
-
-```
-┌─────────────────────────────────────────────────────────┐
-│ codey v0.1.0  │  tokens: 12.4k  │  [3 notifications]   │
-└─────────────────────────────────────────────────────────┘
-```
-
-Notification panel (optional):
-
-```
-┌─ Notifications ─────────────────────────────────────────┐
-│ [fs] src/main.rs modified                    2s ago    │
-│ [bg] Build completed successfully            5s ago    │
-│ [ide] 2 new diagnostics in lib.rs           10s ago    │
-└─────────────────────────────────────────────────────────┘
-```
-
----
-
-## Comparison with Existing Patterns
-
-| Aspect | User Message | Compaction | Notification |
-|--------|--------------|------------|--------------|
-| Source | User input | Token threshold | External events |
-| Trigger | Explicit | Automatic | Configurable |
-| Urgency | Immediate | Delayed | Varies |
-| Context impact | Full turn | Replaces context | Injected |
-| User visibility | Full | Summary | Optional |
-| Agent response | Required | Required | Optional |
-
----
-
-## Use Cases
-
-### 1. File Watcher Integration
-```
-User: "I'm going to edit the config file manually"
-Agent: "Sure, I'll wait"
-[User edits file externally]
-[Notification: config.toml modified]
-Agent: (on next message, aware of change)
-  "I see you updated config.toml. The new timeout value looks good."
-```
-
-### 2. Background Task Completion
-```
-User: "Run the full test suite in the background"
-Agent: (spawns background task)
-[User continues chatting about other things]
-[Notification: test suite completed - 3 failures]
-[UI shows notification badge]
-User: (presses Enter to discuss)
-Agent: "The test suite finished. 3 tests failed in the auth module..."
-```
-
-### 3. IDE Diagnostics
-```
-[Notification: New error in main.rs:45]
-[Auto-activation triggered]
-Agent: "I notice a new type error appeared on line 45.
-        This is likely from the change I just made. Let me fix it."
-```
-
-### 4. CI/CD Integration
-```
-[Notification: PR #123 checks failed]
-Agent: "The CI checks failed on your PR. The linting step
-        found 2 issues. Would you like me to fix them?"
-```
+This means we can append arbitrary content to tool results. The model interprets it semantically based on formatting (like XML tags).
 
 ---
 
-## Mid-Stream Injection
-
-### The Current Limitation
-
-Today's event loop has a strict ordering:
-
-```rust
-// message_queue only drains when agent is idle
-Some(request) = async { self.message_queue.pop_front() },
-    if self.input_mode == InputMode::Normal => {  // <-- BLOCKED during agent turn
-    self.handle_message(request).await?;
-}
-```
-
-This means:
-- While agent is streaming: no new messages processed
-- While agent is thinking: no new messages processed
-- While tools execute: no new messages processed
-- While awaiting approval: no new messages processed
-
-Notifications must wait until the entire turn completes.
-
-### Why Mid-Stream Injection Matters
+## Approach A: Tool Result Augmentation (Recommended)
 
-Consider these scenarios:
+**Observed in**: Claude Code's `<system-reminder>` pattern for mid-turn user messages.
 
-**Scenario 1: Long-Running Tool**
-```
-Agent: "Let me run the full test suite..."
-[Tool executing: 45 seconds]
-[File changes detected - user saved a fix]
-[Notification queued... waiting... waiting...]
-[Tests finish with old code]
-Agent: "Tests failed"
-[NOW notification delivered - too late!]
-```
+Append notification content directly to tool results using XML delimiters. Same `call_id`, just richer content.
 
-**Scenario 2: Streaming Response**
-```
-Agent: (streaming) "Based on my analysis of the codebase..."
-[IDE: new diagnostic - type error on line 45]
-[Agent continues for 30 more seconds, unaware]
-Agent: "...and that's my recommendation"
-[NOW notification delivered]
-User: "But there's a type error now"
-```
+### How It Works
 
-**Scenario 3: Multi-Tool Turn**
 ```
-Agent: Calls tool A, then tool B, then tool C
-[Between tool A and B: critical notification arrives]
-[Agent continues with stale understanding]
-```
-
-### Injection Points
-
-Where could we inject notifications mid-stream?
+Assistant: [tool_call id="edit_1" name="Edit"]
+ToolResult(id="edit_1"): "File updated successfully
 
-```
-Agent Turn Lifecycle:
-    │
-    ├─► send_request()
-    │       │
-    │       ├─► LLM streaming begins ──────────────► [Injection Point 1]
-    │       │       │                                 Between chunks?
-    │       │       ├─► text delta                    Risky: mid-thought
-    │       │       ├─► text delta
-    │       │       └─► tool_call
-    │       │
-    │       ├─► Tool execution ────────────────────► [Injection Point 2]
-    │       │       │                                 Before tool runs?
-    │       │       ├─► awaiting approval
-    │       │       ├─► tool running
-    │       │       └─► tool complete
-    │       │
-    │       ├─► submit_tool_result() ──────────────► [Injection Point 3]
-    │       │       │                                 With tool result?
-    │       │       └─► next iteration
-    │       │
-    │       └─► Finished
-    │
-    └─► Turn complete ─────────────────────────────► [Injection Point 4]
-                                                      Current behavior
+<notification source="file_watcher">
+src/lib.rs was modified externally
+</notification>"
 ```
 
-### Injection Point Analysis
+The agent sees this as a single tool result and interprets the XML naturally.
 
-#### Point 1: During LLM Streaming
-**Feasibility**: Very difficult
-- Can't modify an in-flight API request
-- Would need to cancel and restart with new context
-- Loses streaming progress, poor UX
+### Observed Example (Claude Code)
 
-**When useful**: Critical notifications that invalidate current response
+When a user sends a message while the agent is mid-turn executing tools:
 
-#### Point 2: Before Tool Execution
-**Feasibility**: Possible
-- Tool hasn't run yet
-- Could prepend notification context to tool result
-- Or cancel tool and restart turn
-
-**Implementation**:
-```rust
-// In tool execution flow
-async fn execute_tool(&mut self, tool_call: ToolCall) -> ToolResult {
-    // Check for critical notifications before running
-    if let Some(notification) = self.check_critical_notifications() {
-        return ToolResult::Interrupted {
-            reason: notification,
-            should_restart: true,
-        };
-    }
-
-    // Proceed with tool execution
-    self.run_tool(tool_call).await
-}
-```
-
-#### Point 3: With Tool Result (Recommended)
-**Feasibility**: Good
-- Natural injection point in the request/response cycle
-- Tool result is already being assembled
-- Agent will immediately see notification in next iteration
-
-**Implementation**:
-```rust
-// When building tool result message
-fn build_tool_result(&self, call_id: &str, result: &str) -> ChatMessage {
-    let notifications = self.notification_manager.drain_for_injection();
-
-    let content = if let Some(notifs) = notifications {
-        format!(
-            "{result}\n\n<notifications>\n{notifs}\n</notifications>"
-        )
-    } else {
-        result.to_string()
-    };
-
-    ChatMessage::tool_response(call_id, content)
-}
 ```
+ToolResult(id="edit_1"): "<error>String not found...</error>
 
-**Pros**:
-- Clean integration with existing flow
-- Agent sees notification before next action
-- Doesn't break streaming or tool execution
-
-**Cons**:
-- Notification bundled with unrelated tool result
-- May confuse the model
-
-#### Point 4: Turn Boundary (Current)
-**Feasibility**: Implemented (current behavior)
-**When useful**: Non-urgent notifications, passive mode
-
-### Approach A: Tool Result Augmentation (Recommended)
-
-**Observed in**: Claude Code's background agent system (`<system-reminder>` tags)
-
-The simplest approach: append notification content directly to existing tool results using XML delimiters. No synthetic tool calls, no new message types - just string concatenation with semantic markup.
-
-#### How It Works
-
-Tool results in the Anthropic API are text content, not parsed JSON. This means we can append arbitrary content to them:
-
-```
-Assistant: [tool_call id="edit_1" name="Edit"]
-ToolResult(id="edit_1"): "File updated successfully
+<system-reminder>
+The user sent the following message:
+what about this other approach?
 
-<notification source=\"file_watcher\">
-src/lib.rs was modified externally
-</notification>"
+Please address this message and continue with your tasks.
+</system-reminder>"
 ```
 
-The agent receives this as a single tool result and interprets the XML semantically. Same `call_id` - just richer content.
+The notification is concatenated to the tool result. The agent incorporates it without needing a separate turn.
 
-#### Implementation
+### Implementation
 
 ```rust
 fn submit_tool_result_with_notifications(
@@ -660,7 +106,7 @@ fn submit_tool_result_with_notifications(
 }
 ```
 
-#### System Prompt Addition
+### System Prompt Addition
 
 ```
 You may see <notification> tags in tool results. These are external events
@@ -668,38 +114,21 @@ You may see <notification> tags in tool results. These are external events
 were working. Consider them when deciding your next action.
 ```
 
-#### Observed Example (Claude Code)
-
-When a user sends a message while the agent is mid-turn, it appears appended to tool results:
+### Why This Works
 
-```
-ToolResult(id="edit_1"): "<error>String not found...</error>
-
-<system-reminder>
-The user sent the following message:
-what about this other approach?
-
-Please address this message and continue with your tasks.
-</system-reminder>"
-```
-
-The agent sees this naturally and incorporates the message without needing a separate turn.
-
-#### Why This Works
-
-1. **API compatible**: Tool results are unstructured text - append anything
+1. **API compatible**: Tool results are unstructured text
 2. **No ID management**: Reuses existing tool call ID
-3. **Proven pattern**: Claude Code uses this for mid-turn user message injection
-4. **Clear boundaries**: XML makes tool output vs notification unambiguous
+3. **Proven**: Claude Code uses this pattern in production
+4. **Clear boundaries**: XML delimits tool output vs notification
 5. **Zero overhead**: Just string formatting
 
 ---
 
-### Approach B: Synthetic Tool Injection (Alternative)
+## Approach B: Synthetic Tool Injection (Alternative)
 
-For cases where notifications should appear as distinct "events" in the message history.
+For cases where notifications should appear as distinct events in the message history rather than embedded in tool results.
 
-#### How It Works
+### How It Works
 
 Inject a synthetic tool call + result pair:
 
@@ -710,7 +139,7 @@ Assistant: [tool_call id="notif_1" name="_system_notification"]  ← Synthetic
 ToolResult(id="notif_1"): "File src/lib.rs modified externally"  ← Synthetic
 ```
 
-#### Implementation
+### Implementation
 
 ```rust
 pub const NOTIFICATION_TOOL: &str = "_system_notification";
@@ -738,7 +167,18 @@ fn inject_notification(&mut self, notification: Notification) {
 }
 ```
 
-#### Trade-offs
+Would also need a tool definition:
+```rust
+Tool {
+    name: "_system_notification",
+    description: "System-generated notifications. You do not call this tool;
+                  the system uses it to inform you of external events.",
+}
+```
+
+---
+
+## Comparison
 
 | Aspect | Tool Result Augmentation | Synthetic Tool Injection |
 |--------|-------------------------|-------------------------|
@@ -751,12 +191,12 @@ fn inject_notification(&mut self, notification: Notification) {
 
 ---
 
-### Recommendation
+## Recommendation
 
-**Use Approach A** (Tool Result Augmentation) as primary:
-- Proven in production (Claude Code)
+**Use Approach A** (Tool Result Augmentation):
+- Proven in production
 - Simplest implementation
-- No schema changes needed
+- No schema changes
 
 **Consider Approach B** if:
 - Notifications need distinct transcript entries
@@ -764,254 +204,34 @@ fn inject_notification(&mut self, notification: Notification) {
 
 ---
 
-### Other Considered Approaches
+## Injection Timing
 
-#### Turn Interruption (for critical notifications)
+The natural injection point is **after any tool completes**:
 
 ```rust
-pub enum TurnInterrupt {
-    InjectAndContinue { notification: Notification },
-    CancelAndRestart { notification: Notification },
-    QueueForNext { notification: Notification },
-}
+ToolEvent::Completed { agent_id, call_id, content } => {
+    // Drain pending notifications
+    let notifications = self.notification_manager.drain();
 
-// In agent streaming loop
-match self.check_interrupt() {
-    Some(TurnInterrupt::CancelAndRestart { notification }) => {
-        // Stop current streaming
-        self.cancel_current_request();
-
-        // Inject notification into context
-        self.inject_notification(notification);
-
-        // Restart the turn
-        self.send_request(self.last_prompt, mode);
-    }
-    Some(TurnInterrupt::InjectAndContinue { notification }) => {
-        // Will appear in next tool result
-        self.pending_injection = Some(notification);
-    }
-    None => {
-        // Continue normally
-    }
+    // Submit result with notifications appended
+    agent.submit_tool_result_with_notifications(
+        &call_id,
+        &content,
+        &notifications,
+    );
 }
 ```
 
-### Streaming Context Window
-
-A more sophisticated approach: maintain a "context window" that can be updated:
-
-```
-┌─────────────────────────────────────────────────────────┐
-│ System Prompt                                           │
-├─────────────────────────────────────────────────────────┤
-│ [Dynamic Context Window]          ◄── Can be updated    │
-│ - Current notifications                                 │
-│ - Recent file changes                                   │
-│ - IDE state                                             │
-├─────────────────────────────────────────────────────────┤
-│ Conversation History                                    │
-│ User: ...                                               │
-│ Assistant: ...                                          │
-├─────────────────────────────────────────────────────────┤
-│ Current Turn                                            │
-│ User: "fix the bug"                                     │
-│ Assistant: (streaming...)                               │
-└─────────────────────────────────────────────────────────┘
-```
-
-The "Dynamic Context Window" could be:
-- Updated between tool calls
-- Refreshed on turn restart
-- Limited size (token budget)
-
-### API Considerations
-
-Current Anthropic API doesn't support:
-- Modifying in-flight requests
-- Injecting content mid-stream
-- Multiple system messages (cleanly)
-
-Workarounds:
-1. **Tool result injection**: Append notification to tool results
-2. **Turn restart**: Cancel and re-send with new context
-3. **System prompt refresh**: Update system prompt between turns
-
-Future API features that would help:
-- Server-sent events for context updates
-- Interruptible streaming
-- Dynamic system context
-
-### State Machine View
-
-```
-                    ┌─────────────────────┐
-                    │       IDLE          │
-                    │  (accepts messages) │
-                    └──────────┬──────────┘
-                               │ user message
-                               ▼
-                    ┌─────────────────────┐
-         ┌─────────│     STREAMING       │─────────┐
-         │         │  (LLM generating)   │         │
-         │         └──────────┬──────────┘         │
-         │                    │                    │
-    critical              tool_call            finished
-    notification              │                    │
-         │                    ▼                    │
-         │         ┌─────────────────────┐         │
-         │         │   TOOL_PENDING      │         │
-         │         │  (awaiting tool)    │         │
-         │         └──────────┬──────────┘         │
-         │                    │                    │
-         │    ┌───────────────┼───────────────┐    │
-         │    │               │               │    │
-         │ critical      tool_done        high │    │
-         │ notif             │            notif│    │
-         │    │              ▼               │    │
-         │    │    ┌─────────────────────┐   │    │
-         │    │    │  TOOL_COMPLETE      │   │    │
-         │    │    │ (result ready)      │───┘    │
-         │    │    └──────────┬──────────┘        │
-         │    │               │                   │
-         │    │          inject with              │
-         │    │          tool result              │
-         │    │               │                   │
-         │    │               ▼                   │
-         │    │    ┌─────────────────────┐        │
-         │    └───►│   CONTINUING        │◄───────┘
-         │         │  (next iteration)   │
-         │         └──────────┬──────────┘
-         │                    │
-         │                    └──────────┐
-         │                               │
-         ▼                               ▼
-┌─────────────────────┐       ┌─────────────────────┐
-│  INTERRUPTED        │       │       IDLE          │
-│  (restart turn)     │──────►│  (turn complete)    │
-└─────────────────────┘       └─────────────────────┘
-```
-
-### Implementation Complexity
-
-| Injection Strategy | Complexity | UX Impact | Use Case |
-|-------------------|------------|-----------|----------|
-| Turn boundary | Low | None | Default, non-urgent |
-| Tool result | Medium | Minimal | High priority |
-| Before tool | Medium | Tool cancelled | Critical |
-| Mid-stream | High | Response restart | Emergency only |
-| Context window | High | Seamless | Ideal future |
-
-### Recommendation
-
-**Phase 1**: Tool result injection for High priority
-- Lowest risk
-- Natural integration point
-- Agent sees notification before next action
-
-**Phase 2**: Before-tool interruption for Critical
-- Can cancel unnecessary work
-- Restart with fresh context
-- Clear UX: "Interrupted by notification"
-
-**Phase 3**: Explore context window approach
-- Requires more architectural changes
-- Best long-term UX
-- May need API evolution
+This ensures:
+- Notifications arrive between tool calls (natural pause point)
+- Agent sees them before deciding next action
+- No interruption of streaming or tool execution
 
 ---
 
 ## Open Questions
 
-1. **Notification Persistence**
-   - Should notifications persist across sessions?
-   - Save to transcript vs. separate notification log?
-
-2. **Coalescing Strategy**
-   - How to merge rapid file changes?
-   - Window-based vs. semantic deduplication?
-
-3. **Priority Inference**
-   - Can we auto-detect priority from content?
-   - ML-based importance scoring?
-
-4. **Agent Notification Requests**
-   - Should agents be able to request notifications?
-   - "Notify me when the build finishes"
-
-5. **Notification Actions**
-   - Pre-defined actions agents can take?
-   - "Acknowledge", "Investigate", "Dismiss"?
-
-6. **Rate Limiting**
-   - Prevent notification storms
-   - Per-source rate limits?
-
-7. **Context Budget**
-   - How many notification tokens to allow?
-   - Summarization for old notifications?
-
----
-
-## Phased Implementation
-
-### Phase 1: Foundation
-- [ ] Define `Notification` types and structures
-- [ ] Add `NotificationBlock` to transcript
-- [ ] Create `NotificationManager` with basic queue
-- [ ] Add `MessageRequest::Notification` variant
-
-### Phase 2: Integration
-- [ ] Inject notifications into agent context
-- [ ] Add passive accumulation mode
-- [ ] Status bar notification indicator
-- [ ] Configuration options
-
-### Phase 3: Sources
-- [ ] File watcher notification source
-- [ ] Background task completion notifications
-- [ ] IDE diagnostic notifications
-
-### Phase 4: Activation
-- [ ] Prompt mode with UI
-- [ ] Auto-activation for high priority
-- [ ] Notification panel UI
-
-### Phase 5: Advanced
-- [ ] Coalescing and deduplication
-- [ ] Notification persistence
-- [ ] Agent-requested notifications
-- [ ] External webhook integration
-
----
-
-## Relationship to Sub-Agent Architecture
-
-The Notifications primitive complements the sub-agent work:
-
-- **Sub-agents** spawn and run to completion, returning results as tool output
-- **Notifications** signal when background work completes
-- Combined: Sub-agent spawns in background → Notification when done → Agent can discuss results
-
-```
-Primary Agent
-    │
-    ├─► spawn_background_task("run tests")
-    │       └──────────────────────► Background Runner
-    │                                       │
-    ├─► continues conversation              ├─► running...
-    │   with user                           │
-    │                                       └─► complete
-    │                                       │
-    ◄───────── Notification ────────────────┘
-    │   "Tests completed: 2 failures"
-    │
-    ├─► [Prompt mode] User sees notification
-    │   OR
-    ├─► [Active mode] Agent auto-responds
-```
-
-This creates a complete async work model where:
-1. Sub-agents handle the actual background work
-2. Notifications handle the signaling/awareness
-3. The primary conversation remains responsive
+1. **Activation modes**: Should some notifications auto-trigger agent response vs. passive accumulation?
+2. **Coalescing**: How to batch rapid file changes?
+3. **Transcript representation**: Should notifications appear as a distinct block type?
+4. **Rate limiting**: Prevent notification storms from overwhelming context?

From 9b6e521db18c40f786b4ecc4931c5d7b52d4e7b6 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 24 Jan 2026 20:16:58 +0000
Subject: [PATCH 5/7] research: Add section on handling multiple queued
 notifications

Documents options for when notifications pile up:
- Append all (simple but unbounded)
- Batch into single block (compact)
- Coalesce by source (reduces noise)
- Cap with overflow indicator (bounds tokens)

Recommends combining: coalesce + batch + cap + prioritize.
---
 research/notifications-primitive.md | 112 ++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

diff --git a/research/notifications-primitive.md b/research/notifications-primitive.md
index 2fe3431..7649c5c 100644
--- a/research/notifications-primitive.md
+++ b/research/notifications-primitive.md
@@ -229,6 +229,118 @@ This ensures:
 
 ---
 
+## Handling Multiple Queued Notifications
+
+When several notifications arrive before a tool completes, we need a strategy for injection.
+
+### Option 1: Append All
+
+Simply append all pending notifications to the tool result:
+
+```
+ToolResult(id="edit_1"): "File updated successfully
+
+<notification source="file_watcher">
+src/lib.rs modified externally
+</notification>
+
+<notification source="file_watcher">
+src/main.rs modified externally
+</notification>
+
+<notification source="background_task">
+Build completed: 2 warnings
+</notification>"
+```
+
+**Pros**: Simple, complete information
+**Cons**: Can bloat tool results, token cost scales linearly
+
+### Option 2: Batch into Single Block
+
+Group notifications into one XML block:
+
+```
+ToolResult(id="edit_1"): "File updated successfully
+
+<notifications count="3">
+- [file_watcher] src/lib.rs modified externally
+- [file_watcher] src/main.rs modified externally
+- [background_task] Build completed: 2 warnings
+</notifications>"
+```
+
+**Pros**: Compact, clear count signals "catch up"
+**Cons**: Less structured for agent parsing
+
+### Option 3: Coalesce by Source
+
+Merge similar notifications:
+
+```
+ToolResult(id="edit_1"): "File updated successfully
+
+<notification source="file_watcher">
+Multiple files modified: src/lib.rs, src/main.rs
+</notification>
+
+<notification source="background_task">
+Build completed: 2 warnings
+</notification>"
+```
+
+**Pros**: Reduces noise from rapid file changes
+**Cons**: Loses individual event detail
+
+### Option 4: Cap with Overflow Indicator
+
+Limit injected notifications, indicate overflow:
+
+```
+ToolResult(id="edit_1"): "File updated successfully
+
+<notifications showing="3" total="7">
+- [file_watcher] src/lib.rs modified
+- [file_watcher] src/main.rs modified
+- [background_task] Build completed
+(4 more notifications pending)
+</notifications>"
+```
+
+**Pros**: Bounds token cost, agent knows there's more
+**Cons**: Agent may miss important notifications
+
+### Recommendation
+
+Combine approaches:
+
+1. **Coalesce** rapid same-source notifications (e.g., file watcher debounce)
+2. **Batch** into single `<notifications>` block with count
+3. **Cap** at reasonable limit (e.g., 5-10) with overflow indicator
+4. **Prioritize** if capping - show higher priority first
+
+```rust
+fn format_notifications(notifications: &[Notification], max: usize) -> String {
+    // Coalesce same-source notifications within time window
+    let coalesced = coalesce_by_source(notifications);
+
+    let total = coalesced.len();
+    let showing: Vec<_> = coalesced.into_iter().take(max).collect();
+
+    let mut result = format!("<notifications count=\"{}\">", total);
+    for n in &showing {
+        result.push_str(&format!("\n- [{}] {}", n.source, n.message));
+    }
+    if total > max {
+        result.push_str(&format!("\n({} more pending)", total - max));
+    }
+    result.push_str("\n</notifications>");
+    result
+}
+```
+
+---
+
 ## Open Questions
 
 1. **Activation modes**: Should some notifications auto-trigger agent response vs. passive accumulation?

From 027bb5e115d670578b6fac4c85bceb73e4dba692 Mon Sep 17 00:00:00 2001
From: Travis Dent <tcdent@gmail.com>
Date: Sat, 24 Jan 2026 20:33:13 -0800
Subject: [PATCH 6/7] Update notifications design doc with decisions

- Tool Result Augmentation approach (proven pattern from Claude Code)
- Unified notification flow based on agent state
- Simplifications: no count, no cap, no coalescing (defer)
- Ephemeral NotificationBlock for display only (not persisted)
---
 research/notifications-primitive.md | 178 ++++++++++++----------------
 1 file changed, 75 insertions(+), 103 deletions(-)

diff --git a/research/notifications-primitive.md b/research/notifications-primitive.md
index 7649c5c..52ec847 100644
--- a/research/notifications-primitive.md
+++ b/research/notifications-primitive.md
@@ -20,6 +20,70 @@ We need a way to inject **Notifications** into the agent's context without waiti
 
 ---
 
+## Decisions
+
+### Approach: Tool Result Augmentation
+
+We will use **Tool Result Augmentation** (Approach A below). This is the pattern Anthropic uses in Claude Code with `<system-reminder>` tags. It's proven, simple, and requires no API changes.
+
+### Unified Notification Flow
+
+All external events (user messages, file changes, background tasks, IDE diagnostics) use the same flow based on agent state:
+
+```
+External Event
+      │
+      ▼
+┌─────────────────┐
+│ Agent streaming? │
+└─────────────────┘
+      │
+  ┌───┴───┐
+  ▼       ▼
+ NO      YES
+  │       │
+  ▼       ▼
+Queue    Inject into next
+as new   tool result as
+message  <notification>
+```
+
+No special cases - user messages during a turn are handled the same as file watcher events or background task completions.
+
+### Simplifications
+
+- **No count**: Don't include notification counts
+- **No cap**: Include all pending notifications
+- **No coalescing**: Defer to the future if it becomes a problem
+- **Simple XML**: Just wrap in `<notification source="...">` and append
+
+### Transcript Representation
+
+`NotificationBlock` is **ephemeral** - rendered for display but not persisted:
+
+- User sees the interruption happened in the UI
+- Content actually lives in the tool result (which is persisted)
+- No need to reconstruct notifications when loading a saved conversation
+- Same pattern as sub-agent tool blocks (rendered but not saved)
+
+Example rendering:
+```
+┌─ shell ─────────────────────────────────
+│ cargo build
+│ ✓ Compiled successfully
+└─────────────────────────────────────────
+
+┌─ notification (user) ────────────────────
+│ actually wait, try a different approach
+└─────────────────────────────────────────
+
+┌─ edit_file ─────────────────────────────
+│ ...
+└─────────────────────────────────────────
+```
+
+---
+
 ## Key Insight: Tool Results Are Unstructured Text
 
 Tool definitions in the Anthropic API use JSON schemas for structured input:
@@ -93,11 +157,11 @@ fn submit_tool_result_with_notifications(
     } else {
         let notif_xml = notifications.iter()
             .map(|n| format!(
-                "<notification source=\"{:?}\">\n{}\n</notification>",
-                n.source, n.to_message()
+                "<notification source=\"{}\">\n{}\n</notification>",
+                n.source, n.message
             ))
             .collect::<Vec<_>>()
-            .join("\n");
+            .join("\n\n");
 
         format!("{result}\n\n{notif_xml}")
     };
@@ -193,15 +257,11 @@ Tool {
 
 ## Recommendation
 
-**Use Approach A** (Tool Result Augmentation):
-- Proven in production
+**Decision: Use Approach A** (Tool Result Augmentation):
+- Proven in production (Claude Code uses this)
 - Simplest implementation
 - No schema changes
 
-**Consider Approach B** if:
-- Notifications need distinct transcript entries
-- Agent should explicitly acknowledge notifications
-
 ---
 
 ## Injection Timing
@@ -231,11 +291,7 @@ This ensures:
 
 ## Handling Multiple Queued Notifications
 
-When several notifications arrive before a tool completes, we need a strategy for injection.
-
-### Option 1: Append All
-
-Simply append all pending notifications to the tool result:
+**Decision**: Keep it simple - append all notifications as separate XML blocks:
 
 ```
 ToolResult(id="edit_1"): "File updated successfully
@@ -253,97 +309,13 @@ Build completed: 2 warnings
 </notification>"
 ```
 
-**Pros**: Simple, complete information
-**Cons**: Can bloat tool results, token cost scales linearly
-
-### Option 2: Batch into Single Block
-
-Group notifications into one XML block:
-
-```
-ToolResult(id="edit_1"): "File updated successfully
-
-<notifications count="3">
-- [file_watcher] src/lib.rs modified externally
-- [file_watcher] src/main.rs modified externally
-- [background_task] Build completed: 2 warnings
-</notifications>"
-```
-
-**Pros**: Compact, clear count signals "catch up"
-**Cons**: Less structured for agent parsing
-
-### Option 3: Coalesce by Source
-
-Merge similar notifications:
-
-```
-ToolResult(id="edit_1"): "File updated successfully
-
-<notification source="file_watcher">
-Multiple files modified: src/lib.rs, src/main.rs
-</notification>
-
-<notification source="background_task">
-Build completed: 2 warnings
-</notification>"
-```
-
-**Pros**: Reduces noise from rapid file changes
-**Cons**: Loses individual event detail
-
-### Option 4: Cap with Overflow Indicator
-
-Limit injected notifications, indicate overflow:
-
-```
-ToolResult(id="edit_1"): "File updated successfully
-
-<notifications showing="3" total="7">
-- [file_watcher] src/lib.rs modified
-- [file_watcher] src/main.rs modified
-- [background_task] Build completed
-(4 more notifications pending)
-</notifications>"
-```
-
-**Pros**: Bounds token cost, agent knows there's more
-**Cons**: Agent may miss important notifications
-
-### Recommendation
-
-Combine approaches:
-
-1. **Coalesce** rapid same-source notifications (e.g., file watcher debounce)
-2. **Batch** into single `<notifications>` block with count
-3. **Cap** at reasonable limit (e.g., 5-10) with overflow indicator
-4. **Prioritize** if capping - show higher priority first
-
-```rust
-fn format_notifications(notifications: &[Notification], max: usize) -> String {
-    // Coalesce same-source notifications within time window
-    let coalesced = coalesce_by_source(notifications);
-
-    let total = coalesced.len();
-    let showing: Vec<_> = coalesced.into_iter().take(max).collect();
-
-    let mut result = format!("<notifications count=\"{}\">", total);
-    for n in &showing {
-        result.push_str(&format!("\n- [{}] {}", n.source, n.message));
-    }
-    if total > max {
-        result.push_str(&format!("\n({} more pending)", total - max));
-    }
-    result.push_str("\n</notifications>");
-    result
-}
-```
+No counting, no capping, no coalescing. If this becomes a problem (e.g., file watcher storms), we can add coalescing later.
 
 ---
 
 ## Open Questions
 
-1. **Activation modes**: Should some notifications auto-trigger agent response vs. passive accumulation?
-2. **Coalescing**: How to batch rapid file changes?
-3. **Transcript representation**: Should notifications appear as a distinct block type?
-4. **Rate limiting**: Prevent notification storms from overwhelming context?
+1. ~~**Activation modes**~~: Decided - unified flow based on agent state (see Decisions above)
+2. **Coalescing**: Deferred - solve if it becomes a problem
+3. ~~**Transcript representation**~~: Decided - ephemeral `NotificationBlock` (see Decisions above)
+4. **Rate limiting**: Deferred - solve if it becomes a problem

From 9bc27f7f8e23569137ab682576ff99d2dae326e7 Mon Sep 17 00:00:00 2001
From: Travis Dent <tcdent@gmail.com>
Date: Sun, 25 Jan 2026 13:13:14 -0800
Subject: [PATCH 7/7] WIP: notifications primitive - data structures and
 ephemeral blocks

- Add NotificationSource enum and Notification struct with to_xml()
- Add is_ephemeral() method to Block trait
- Add NotificationBlock for rendering notifications (ephemeral)
- Custom Serialize for Turn filters out ephemeral blocks
- Update research doc with implementation progress

Remaining: wiring queue_message() and ToolEvent::Completed handler
---
 research/notifications-primitive.md |  32 +++++++++
 src/app.rs                          |  48 +++++++++++++
 src/transcript.rs                   | 103 +++++++++++++++++++++++++++-
 3 files changed, 181 insertions(+), 2 deletions(-)

diff --git a/research/notifications-primitive.md b/research/notifications-primitive.md
index 52ec847..2e1702a 100644
--- a/research/notifications-primitive.md
+++ b/research/notifications-primitive.md
@@ -313,6 +313,38 @@ No counting, no capping, no coalescing. If this becomes a problem (e.g., file wa
 
 ---
 
+## Implementation Progress
+
+### ✅ Completed
+
+**Data Structures** (`src/app.rs` lines 65-111):
+- `NotificationSource` enum: User, FileWatcher, BackgroundTask, Ide
+- `Notification` struct with `to_xml()` method for injection format
+
+**Ephemeral Block Support** (`src/transcript.rs`):
+- Added `is_ephemeral()` method to `Block` trait (default `false`)
+- `NotificationBlock` struct that returns `is_ephemeral() -> true`
+- Custom `Serialize` for `Turn` that filters out ephemeral blocks
+- Notification rendering with yellow styling and ⚡ icon
+
+### 🔲 Remaining
+
+**Wiring** (in `src/app.rs`):
+1. Add `pending_notifications: VecDeque<Notification>` to `App` struct
+2. Add `drain_notifications()` method to `App`
+3. Modify `queue_message()` (line ~529) - if `input_mode != Normal`, create notification instead of queuing message
+4. Modify `ToolEvent::Completed` handler (line ~1015) - drain notifications and append XML to content before calling `submit_tool_result`
+
+**System Prompt**:
+- Add explanation of `<notification>` tags (see "System Prompt Addition" section above)
+
+**Testing**:
+- Test notification injection into tool results
+- Test ephemeral block filtering during serialization
+- Test `queue_message()` behavior when streaming vs idle
+
+---
+
 ## Open Questions
 
 1. ~~**Activation modes**~~: Decided - unified flow based on agent state (see Decisions above)
diff --git a/src/app.rs b/src/app.rs
index aa2bfeb..db97b16 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -62,6 +62,54 @@ enum MessageRequest {
     Command(String, usize),
 }
 
+/// Notification sources for mid-turn injections
+#[derive(Debug, Clone)]
+pub enum NotificationSource {
+    /// User sent a message while agent was streaming
+    User,
+    /// File was modified externally
+    FileWatcher,
+    /// Background task completed
+    BackgroundTask,
+    /// IDE event (diagnostics, etc.)
+    Ide,
+}
+
+impl std::fmt::Display for NotificationSource {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            NotificationSource::User => write!(f, "user"),
+            NotificationSource::FileWatcher => write!(f, "file_watcher"),
+            NotificationSource::BackgroundTask => write!(f, "background_task"),
+            NotificationSource::Ide => write!(f, "ide"),
+        }
+    }
+}
+
+/// A notification to be injected into tool results
+#[derive(Debug, Clone)]
+pub struct Notification {
+    pub source: NotificationSource,
+    pub message: String,
+}
+
+impl Notification {
+    pub fn new(source: NotificationSource, message: impl Into<String>) -> Self {
+        Self {
+            source,
+            message: message.into(),
+        }
+    }
+
+    /// Format as XML for injection into tool results
+    pub fn to_xml(&self) -> String {
+        format!(
+            "<notification source=\"{}\">\n{}\n</notification>",
+            self.source, self.message
+        )
+    }
+}
+
 /// Actions that can be triggered by terminal events
 #[derive(Debug, Clone, PartialEq, Eq)]
 enum Action {
diff --git a/src/transcript.rs b/src/transcript.rs
index 8fc2576..54adbd7 100644
--- a/src/transcript.rs
+++ b/src/transcript.rs
@@ -11,7 +11,7 @@ use ratatui::{
     style::{Color, Modifier, Style},
     text::{Line, Span},
 };
-use serde::{Deserialize, Serialize};
+use serde::{Deserialize, Serialize, ser::SerializeStruct};
 
 #[cfg(feature = "cli")]
 use crate::compaction::CompactionBlock;
@@ -96,6 +96,10 @@ pub trait Block: Send + Sync {
     /// Set the status of this block
     fn set_status(&mut self, status: Status);
 
+    /// Whether this block is ephemeral (rendered but not persisted)
+    /// Ephemeral blocks are filtered out during serialization.
+    fn is_ephemeral(&self) -> bool { false }
+
     /// Render status icon with appropriate color (CLI only)
     #[cfg(feature = "cli")]
     fn render_status(&self) -> Span<'static> {
@@ -345,6 +349,80 @@ impl Block for ToolBlock {
     }
 }
 
+/// Notification block for mid-turn injected messages
+/// These are ephemeral - rendered but not persisted to the transcript.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct NotificationBlock {
+    pub source: String,
+    pub message: String,
+    #[serde(skip_deserializing, default = "NotificationBlock::default_status")]
+    status: Status,
+}
+
+impl NotificationBlock {
+    pub fn new(source: impl Into<String>, message: impl Into<String>) -> Self {
+        Self {
+            source: source.into(),
+            message: message.into(),
+            status: Status::Complete,
+        }
+    }
+    
+    fn default_status() -> Status {
+        Status::Complete
+    }
+}
+
+#[typetag::serde]
+impl Block for NotificationBlock {
+    fn kind(&self) -> BlockType {
+        BlockType::Text  // Treat as text for streaming purposes
+    }
+
+    fn status(&self) -> Status {
+        self.status
+    }
+
+    fn set_status(&mut self, status: Status) {
+        self.status = status;
+    }
+
+    fn is_ephemeral(&self) -> bool {
+        true  // Key difference - not persisted
+    }
+
+    #[cfg(feature = "cli")]
+    fn render(&self, width: u16) -> Vec<Line<'_>> {
+        let mut lines = Vec::new();
+        
+        // Header: "⚡ notification (source)"
+        lines.push(Line::from(vec![
+            Span::styled("⚡ ", Style::default().fg(Color::Yellow)),
+            Span::styled(
+                format!("notification ({})", self.source),
+                Style::default()
+                    .fg(Color::Yellow)
+                    .add_modifier(Modifier::BOLD),
+            ),
+        ]));
+        
+        // Message content (wrapped)
+        let wrapped = textwrap::wrap(&self.message, width.saturating_sub(2) as usize);
+        for line in wrapped {
+            lines.push(Line::from(Span::styled(
+                format!("  {}", line),
+                Style::default().fg(Color::Yellow),
+            )));
+        }
+        
+        lines
+    }
+
+    fn text(&self) -> Option<&str> {
+        Some(&self.message)
+    }
+}
+
 /// Helper: render prefix for background tools - "[bg] " if true, empty otherwise
 #[cfg(feature = "cli")]
 pub fn render_prefix(background: bool) -> Span<'static> {
@@ -407,7 +485,7 @@ pub fn render_result(result: &str, max_lines: usize) -> Vec<Line<'static>> {
 }
 
 /// A turn in the conversation - one user or assistant response
-#[derive(Serialize, Deserialize)]
+#[derive(Deserialize)]
 pub struct Turn {
     pub id: usize,
     pub role: Role,
@@ -418,6 +496,27 @@ pub struct Turn {
     pub active_block_idx: Option<usize>,
 }
 
+/// Custom serialization that filters out ephemeral blocks
+impl Serialize for Turn {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        // Filter out ephemeral blocks before serializing
+        let persistent_content: Vec<&Box<dyn Block>> = self.content
+            .iter()
+            .filter(|b| !b.is_ephemeral())
+            .collect();
+        
+        let mut state = serializer.serialize_struct("Turn", 4)?;
+        state.serialize_field("id", &self.id)?;
+        state.serialize_field("role", &self.role)?;
+        state.serialize_field("content", &persistent_content)?;
+        state.serialize_field("timestamp", &self.timestamp)?;
+        state.end()
+    }
+}
+
 impl Turn {
     pub fn new(id: usize, role: Role, content: Vec<Box<dyn Block>>) -> Self {
         Self {