From 102eaa9a7e27a5ba703872fd8e89057e8f801d84 Mon Sep 17 00:00:00 2001 From: Corey McCallum Date: Tue, 3 Feb 2026 14:16:23 -0800 Subject: [PATCH 01/10] feat(memory): add short-term memory system Adds deterministic short-term memory with three storage mechanisms: - Auto-store from tool responses via memory_hint field - Explicit memory_short tool (store/get/delete/list actions) - HTTP API endpoints for external access Backend: src/caal/memory/ package with file-based JSON persistence, singleton pattern, TTL support, and context injection into LLM. Frontend: Memory Panel UI with Brain icon button, entry list, detail modal, and clear all functionality. Includes i18n translations for en, fr, it. Co-Authored-By: Claude Opus 4.5 --- frontend/app/api/memory/[key]/route.ts | 63 ++++ frontend/app/api/memory/route.ts | 87 ++++++ frontend/components/app/view-controller.tsx | 6 + frontend/components/app/welcome-view.tsx | 13 +- frontend/components/memory/index.ts | 1 + frontend/components/memory/memory-panel.tsx | 309 ++++++++++++++++++++ frontend/messages/en.json | 24 ++ frontend/messages/fr.json | 24 ++ frontend/messages/it.json | 24 ++ src/caal/integrations/__init__.py | 2 + src/caal/integrations/memory_tool.py | 113 +++++++ src/caal/llm/llm_node.py | 34 ++- src/caal/memory/__init__.py | 34 +++ src/caal/memory/base.py | 51 ++++ src/caal/memory/short_term.py | 294 +++++++++++++++++++ src/caal/webhooks.py | 168 +++++++++++ voice_agent.py | 20 +- 17 files changed, 1261 insertions(+), 6 deletions(-) create mode 100644 frontend/app/api/memory/[key]/route.ts create mode 100644 frontend/app/api/memory/route.ts create mode 100644 frontend/components/memory/index.ts create mode 100644 frontend/components/memory/memory-panel.tsx create mode 100644 src/caal/integrations/memory_tool.py create mode 100644 src/caal/memory/__init__.py create mode 100644 src/caal/memory/base.py create mode 100644 src/caal/memory/short_term.py diff --git a/frontend/app/api/memory/[key]/route.ts b/frontend/app/api/memory/[key]/route.ts new file mode 100644 index 0000000..27b11ed --- /dev/null +++ b/frontend/app/api/memory/[key]/route.ts @@ -0,0 +1,63 @@ +import { NextRequest, NextResponse } from 'next/server'; + +const WEBHOOK_URL = process.env.WEBHOOK_URL || 'http://agent:8889'; + +interface RouteParams { + params: Promise<{ key: string }>; +} + +/** + * GET /api/memory/[key] - Get a single memory entry + */ +export async function GET(_request: NextRequest, { params }: RouteParams) { + try { + const { key } = await params; + const res = await fetch(`${WEBHOOK_URL}/memory/${encodeURIComponent(key)}`, { + method: 'GET', + headers: { 'Content-Type': 'application/json' }, + }); + + if (!res.ok) { + const text = await res.text(); + console.error(`[/api/memory/${key}] Backend error:`, res.status, text); + return NextResponse.json({ error: text || 'Backend error' }, { status: res.status }); + } + + const data = await res.json(); + return NextResponse.json(data); + } catch (error) { + console.error('[/api/memory/[key]] Error:', error); + return NextResponse.json( + { error: error instanceof Error ? error.message : 'Unknown error' }, + { status: 500 } + ); + } +} + +/** + * DELETE /api/memory/[key] - Delete a single memory entry + */ +export async function DELETE(_request: NextRequest, { params }: RouteParams) { + try { + const { key } = await params; + const res = await fetch(`${WEBHOOK_URL}/memory/${encodeURIComponent(key)}`, { + method: 'DELETE', + headers: { 'Content-Type': 'application/json' }, + }); + + if (!res.ok) { + const text = await res.text(); + console.error(`[/api/memory/${key}] Backend error:`, res.status, text); + return NextResponse.json({ error: text || 'Backend error' }, { status: res.status }); + } + + const data = await res.json(); + return NextResponse.json(data); + } catch (error) { + console.error('[/api/memory/[key]] Error:', error); + return NextResponse.json( + { error: error instanceof Error ? error.message : 'Unknown error' }, + { status: 500 } + ); + } +} diff --git a/frontend/app/api/memory/route.ts b/frontend/app/api/memory/route.ts new file mode 100644 index 0000000..ab43e22 --- /dev/null +++ b/frontend/app/api/memory/route.ts @@ -0,0 +1,87 @@ +import { NextRequest, NextResponse } from 'next/server'; + +const WEBHOOK_URL = process.env.WEBHOOK_URL || 'http://agent:8889'; + +/** + * GET /api/memory - List all memory entries + */ +export async function GET() { + try { + const res = await fetch(`${WEBHOOK_URL}/memory`, { + method: 'GET', + headers: { 'Content-Type': 'application/json' }, + }); + + if (!res.ok) { + const text = await res.text(); + console.error('[/api/memory] Backend error:', res.status, text); + return NextResponse.json({ error: text || 'Backend error' }, { status: res.status }); + } + + const data = await res.json(); + return NextResponse.json(data); + } catch (error) { + console.error('[/api/memory] Error:', error); + return NextResponse.json( + { error: error instanceof Error ? error.message : 'Unknown error' }, + { status: 500 } + ); + } +} + +/** + * POST /api/memory - Store a new memory entry + */ +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + + const res = await fetch(`${WEBHOOK_URL}/memory`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }); + + if (!res.ok) { + const text = await res.text(); + console.error('[/api/memory] Backend error:', res.status, text); + return NextResponse.json({ error: text || 'Backend error' }, { status: res.status }); + } + + const data = await res.json(); + return NextResponse.json(data); + } catch (error) { + console.error('[/api/memory] Error:', error); + return NextResponse.json( + { error: error instanceof Error ? error.message : 'Unknown error' }, + { status: 500 } + ); + } +} + +/** + * DELETE /api/memory - Clear all memory entries + */ +export async function DELETE() { + try { + const res = await fetch(`${WEBHOOK_URL}/memory`, { + method: 'DELETE', + headers: { 'Content-Type': 'application/json' }, + }); + + if (!res.ok) { + const text = await res.text(); + console.error('[/api/memory] Backend error:', res.status, text); + return NextResponse.json({ error: text || 'Backend error' }, { status: res.status }); + } + + const data = await res.json(); + return NextResponse.json(data); + } catch (error) { + console.error('[/api/memory] Error:', error); + return NextResponse.json( + { error: error instanceof Error ? error.message : 'Unknown error' }, + { status: 500 } + ); + } +} diff --git a/frontend/components/app/view-controller.tsx b/frontend/components/app/view-controller.tsx index 655f13b..c419fa9 100644 --- a/frontend/components/app/view-controller.tsx +++ b/frontend/components/app/view-controller.tsx @@ -6,6 +6,7 @@ import { useSessionContext } from '@livekit/components-react'; import type { AppConfig } from '@/app-config'; import { SessionView } from '@/components/app/session-view'; import { WelcomeView } from '@/components/app/welcome-view'; +import { MemoryPanel } from '@/components/memory'; import { SettingsPanel } from '@/components/settings/settings-panel'; import { ToolsPanel } from '@/components/tools'; @@ -38,6 +39,7 @@ export function ViewController({ appConfig }: ViewControllerProps) { const { isConnected, start } = useSessionContext(); const [settingsOpen, setSettingsOpen] = useState(false); const [toolsOpen, setToolsOpen] = useState(false); + const [memoryOpen, setMemoryOpen] = useState(false); return ( <> @@ -50,6 +52,7 @@ export function ViewController({ appConfig }: ViewControllerProps) { onStartCall={start} onOpenSettings={() => setSettingsOpen(true)} onOpenTools={() => setToolsOpen(true)} + onOpenMemory={() => setMemoryOpen(true)} /> )} {/* Session view */} @@ -63,6 +66,9 @@ export function ViewController({ appConfig }: ViewControllerProps) { {/* Tools panel */} setToolsOpen(false)} /> + + {/* Memory panel */} + setMemoryOpen(false)} /> ); } diff --git a/frontend/components/app/welcome-view.tsx b/frontend/components/app/welcome-view.tsx index 9778b3f..7cb4b45 100644 --- a/frontend/components/app/welcome-view.tsx +++ b/frontend/components/app/welcome-view.tsx @@ -1,7 +1,7 @@ 'use client'; import { useTranslations } from 'next-intl'; -import { Gear, Wrench } from '@phosphor-icons/react/dist/ssr'; +import { Brain, Gear, Wrench } from '@phosphor-icons/react/dist/ssr'; import { Button } from '@/components/livekit/button'; function WelcomeImage() { @@ -26,12 +26,14 @@ interface WelcomeViewProps { onStartCall: () => void; onOpenSettings?: () => void; onOpenTools?: () => void; + onOpenMemory?: () => void; } export const WelcomeView = ({ onStartCall, onOpenSettings, onOpenTools, + onOpenMemory, ref, }: React.ComponentProps<'div'> & WelcomeViewProps) => { const t = useTranslations('Welcome'); @@ -41,6 +43,15 @@ export const WelcomeView = ({
{/* Top right buttons */}
+ {onOpenMemory && ( + + )} {onOpenTools && ( +
+ + + {/* Content */} +
+ {loading ? ( +
{tCommon('loading')}
+ ) : entries.length === 0 ? ( +
+ +

{t('panel.emptyState')}

+

{t('panel.emptyHint')}

+
+ ) : ( +
+ {entries.map((entry) => ( +
setSelectedEntry(entry)} + > +
+
+ {entry.key} + +
+

+ {formatValue(entry.value)} +

+
+ {formatTimeAgo(entry.stored_at)} + {entry.expires_at && ( + + + {formatExpiry(entry.expires_at)} + + )} +
+
+ +
+ ))} +
+ )} +
+ + {/* Footer */} +
+ +
+
+ + {/* Detail modal */} + {selectedEntry && ( +
+
setSelectedEntry(null)} /> +
+ + +
+

{selectedEntry.key}

+ +
+ +
+ +
+                {typeof selectedEntry.value === 'string'
+                  ? selectedEntry.value
+                  : JSON.stringify(selectedEntry.value, null, 2)}
+              
+
+ +
+
+ + {new Date(selectedEntry.stored_at * 1000).toLocaleString()} +
+
+ + + {selectedEntry.expires_at + ? new Date(selectedEntry.expires_at * 1000).toLocaleString() + : t('detail.noExpiry')} + +
+
+ +
+ + +
+
+
+ )} +
, + document.body + ); +} diff --git a/frontend/messages/en.json b/frontend/messages/en.json index 55e513e..e93280e 100644 --- a/frontend/messages/en.json +++ b/frontend/messages/en.json @@ -8,11 +8,13 @@ "continue": "Continue", "cancel": "Cancel", "close": "Close", + "delete": "Delete", "retry": "Retry", "enabled": "Enabled", "disabled": "Disabled", "settings": "Settings", "tools": "Tools", + "memory": "Memory", "connected": "Connected" }, "Welcome": { @@ -221,6 +223,28 @@ "parametersTitle": "Tool Parameters" } }, + "Memory": { + "panel": { + "title": "Memory", + "subtitle": "{count} entries stored", + "emptyState": "No memories stored", + "emptyHint": "Memories are created when tools return data or when you ask me to remember something", + "clearAll": "Clear All" + }, + "confirmClearAll": "Clear all memory entries?", + "detail": { + "value": "Value", + "storedAt": "Stored", + "expiresAt": "Expires", + "noExpiry": "No expiry", + "source": "Source", + "sources": { + "tool_hint": "Auto-stored from tool", + "explicit": "Stored by request", + "api": "Stored via API" + } + } + }, "Setup": { "welcome": "Welcome to CAAL", "stepOf": "Step {current} of {total}", diff --git a/frontend/messages/fr.json b/frontend/messages/fr.json index b31bfc2..7bd7a71 100644 --- a/frontend/messages/fr.json +++ b/frontend/messages/fr.json @@ -8,11 +8,13 @@ "continue": "Continuer", "cancel": "Annuler", "close": "Fermer", + "delete": "Supprimer", "retry": "Réessayer", "enabled": "Actif", "disabled": "Inactif", "settings": "Paramètres", "tools": "Outils", + "memory": "Mémoire", "connected": "Connecté" }, "Welcome": { @@ -221,6 +223,28 @@ "parametersTitle": "Paramètres de l'outil" } }, + "Memory": { + "panel": { + "title": "Mémoire", + "subtitle": "{count} entrées stockées", + "emptyState": "Aucune mémoire stockée", + "emptyHint": "Les mémoires sont créées lorsque les outils renvoient des données ou lorsque vous me demandez de mémoriser quelque chose", + "clearAll": "Tout effacer" + }, + "confirmClearAll": "Effacer toutes les entrées de mémoire ?", + "detail": { + "value": "Valeur", + "storedAt": "Stocké", + "expiresAt": "Expire", + "noExpiry": "Pas d'expiration", + "source": "Source", + "sources": { + "tool_hint": "Stocké automatiquement par l'outil", + "explicit": "Stocké sur demande", + "api": "Stocké via API" + } + } + }, "Setup": { "welcome": "Bienvenue sur CAAL", "stepOf": "Étape {current} sur {total}", diff --git a/frontend/messages/it.json b/frontend/messages/it.json index d426444..d7f2005 100644 --- a/frontend/messages/it.json +++ b/frontend/messages/it.json @@ -8,11 +8,13 @@ "continue": "Continua", "cancel": "Annulla", "close": "Chiudi", + "delete": "Elimina", "retry": "Riprova", "enabled": "Attivo", "disabled": "Disattivo", "settings": "Impostazioni", "tools": "Strumenti", + "memory": "Memoria", "connected": "Connesso" }, "Welcome": { @@ -221,6 +223,28 @@ "parametersTitle": "Parametri dello strumento" } }, + "Memory": { + "panel": { + "title": "Memoria", + "subtitle": "{count} voci memorizzate", + "emptyState": "Nessuna memoria memorizzata", + "emptyHint": "Le memorie vengono create quando gli strumenti restituiscono dati o quando mi chiedi di ricordare qualcosa", + "clearAll": "Cancella tutto" + }, + "confirmClearAll": "Cancellare tutte le voci di memoria?", + "detail": { + "value": "Valore", + "storedAt": "Memorizzato", + "expiresAt": "Scade", + "noExpiry": "Nessuna scadenza", + "source": "Fonte", + "sources": { + "tool_hint": "Memorizzato automaticamente dallo strumento", + "explicit": "Memorizzato su richiesta", + "api": "Memorizzato tramite API" + } + } + }, "Setup": { "welcome": "Benvenuto su CAAL", "stepOf": "Passaggio {current} di {total}", diff --git a/src/caal/integrations/__init__.py b/src/caal/integrations/__init__.py index 9134700..b50b827 100644 --- a/src/caal/integrations/__init__.py +++ b/src/caal/integrations/__init__.py @@ -3,6 +3,7 @@ """ from .mcp_loader import MCPServerConfig, initialize_mcp_servers, load_mcp_config +from .memory_tool import MemoryTools from .n8n import discover_n8n_workflows, execute_n8n_workflow from .web_search import WebSearchTools @@ -12,5 +13,6 @@ "MCPServerConfig", "discover_n8n_workflows", "execute_n8n_workflow", + "MemoryTools", "WebSearchTools", ] diff --git a/src/caal/integrations/memory_tool.py b/src/caal/integrations/memory_tool.py new file mode 100644 index 0000000..d1c5e13 --- /dev/null +++ b/src/caal/integrations/memory_tool.py @@ -0,0 +1,113 @@ +"""Short-term memory tool for CAAL. + +Provides explicit memory operations for the LLM to store and retrieve data. +This is mechanism #2 of the three storage mechanisms: + 1. Auto-store: Tool responses with memory_hint field (automatic) + 2. Explicit: memory_short tool (this file) - user says "remember X" + 3. Passive: Tool descriptions instruct LLM to check memory first + +Usage: + class VoiceAssistant(MemoryTools, Agent): + pass # memory_short tool is automatically available +""" + +import json +import logging +from typing import TYPE_CHECKING + +from livekit.agents import function_tool + +if TYPE_CHECKING: + from ..memory import ShortTermMemory + +logger = logging.getLogger(__name__) + + +class MemoryTools: + """Mixin providing memory_short tool for explicit memory operations. + + Requires the parent class to have: + - self._short_term_memory: ShortTermMemory instance + """ + + @function_tool + async def memory_short( + self, + action: str, + key: str = "", + value: str = "", + ) -> str: + """Store or retrieve information for later use in this conversation. + + Use this to remember things the user tells you like flight numbers, + tracking codes, preferences, or any data you'll need to reference later. + + IMPORTANT: Before asking the user for information you've already + discussed (like a tracking number or flight), check memory first + with action="get" or action="list". + + Args: + action: One of "store", "get", "delete", "list" + key: The key to store/get/delete (e.g., "flight_number", "tracking_code") + value: The value to store (only required for action="store") + + Returns: + Result of the operation + """ + memory: "ShortTermMemory" = getattr(self, "_short_term_memory", None) + + if memory is None: + logger.warning("memory_short called but no memory instance available") + return "Memory not available" + + logger.info(f"memory_short: action={action}, key={key}") + + if action == "store": + if not key: + return "Key is required for store action" + if not value: + return "Value is required for store action" + + # Parse value as JSON if it looks like JSON + try: + if value.startswith(("{", "[")): + parsed_value = json.loads(value) + else: + parsed_value = value + except json.JSONDecodeError: + parsed_value = value + + memory.store(key=key, value=parsed_value, source="explicit") + return f"Stored: {key}" + + elif action == "get": + if not key: + return "Key is required for get action" + + result = memory.get(key) + if result is None: + return f"No value found for key: {key}" + + if isinstance(result, (dict, list)): + return json.dumps(result) + return str(result) + + elif action == "delete": + if not key: + return "Key is required for delete action" + + deleted = memory.delete(key) + return f"Deleted: {key}" if deleted else f"Key not found: {key}" + + elif action == "list": + entries = memory.list_keys() + if not entries: + return "Memory is empty" + + lines = ["Stored memory keys:"] + for entry in entries: + lines.append(f"- {entry['key']} (source: {entry['source']})") + return "\n".join(lines) + + else: + return f"Unknown action: {action}. Valid actions: store, get, delete, list" diff --git a/src/caal/llm/llm_node.py b/src/caal/llm/llm_node.py index bca9fb9..931639a 100644 --- a/src/caal/llm/llm_node.py +++ b/src/caal/llm/llm_node.py @@ -28,6 +28,7 @@ async def llm_node(self, chat_ctx, tools, model_settings): from typing import TYPE_CHECKING, Any from ..integrations.n8n import execute_n8n_workflow +from ..memory import ShortTermMemory from ..utils.formatting import strip_markdown_for_tts from .providers import LLMProvider @@ -77,6 +78,7 @@ async def llm_node( chat_ctx, provider: LLMProvider, tool_data_cache: ToolDataCache | None = None, + short_term_memory: ShortTermMemory | None = None, max_turns: int = 20, ) -> AsyncIterable[str]: """Provider-agnostic LLM node with tool calling support. @@ -88,6 +90,7 @@ async def llm_node( chat_ctx: Chat context from LiveKit provider: LLMProvider instance (OllamaProvider, GroqProvider, etc.) tool_data_cache: Cache for structured tool response data + short_term_memory: Short-term memory for context persistence max_turns: Max conversation turns to keep in sliding window Yields: @@ -106,6 +109,7 @@ async def llm_node(self, chat_ctx, tools, model_settings): messages = _build_messages_from_context( chat_ctx, tool_data_cache=tool_data_cache, + short_term_memory=short_term_memory, max_turns=max_turns, ) @@ -168,6 +172,7 @@ async def llm_node(self, chat_ctx, tools, model_settings): response.content, provider=provider, tool_data_cache=tool_data_cache, + short_term_memory=short_term_memory, ) # Stream follow-up response with tool results @@ -220,18 +225,21 @@ async def llm_node(self, chat_ctx, tools, model_settings): def _build_messages_from_context( chat_ctx, tool_data_cache: ToolDataCache | None = None, + short_term_memory: ShortTermMemory | None = None, max_turns: int = 20, ) -> list[dict]: - """Build messages with sliding window and tool data context. + """Build messages with sliding window and context injection. Message order: 1. System prompt (always first, never trimmed) 2. Tool data context (injected from cache) - 3. Chat history (sliding window applied) + 3. Short-term memory context (persistent facts) + 4. Chat history (sliding window applied) Args: chat_ctx: LiveKit chat context tool_data_cache: Cache of recent tool response data + short_term_memory: Short-term memory for persistent context max_turns: Max conversation turns to keep (1 turn = user + assistant) """ system_prompt = None @@ -294,7 +302,13 @@ def _build_messages_from_context( if context: messages.append({"role": "system", "content": context}) - # 3. Apply sliding window to chat history + # 3. Inject short-term memory context + if short_term_memory: + memory_context = short_term_memory.get_context_message() + if memory_context: + messages.append({"role": "system", "content": memory_context}) + + # 4. Apply sliding window to chat history # max_turns * 2 accounts for user + assistant pairs max_messages = max_turns * 2 if len(chat_messages) > max_messages: @@ -441,6 +455,7 @@ async def _execute_tool_calls( response_content: str | None, provider: LLMProvider, tool_data_cache: ToolDataCache | None = None, + short_term_memory: ShortTermMemory | None = None, ) -> list[dict]: """Execute tool calls and append results to messages. @@ -451,6 +466,7 @@ async def _execute_tool_calls( response_content: Original LLM response content (if any) provider: LLM provider (for formatting tool results) tool_data_cache: Optional cache to store structured tool response data + short_term_memory: Optional memory to store memory_hint from tool responses """ # Add assistant message with tool calls tool_call_message = provider.format_tool_call_message( @@ -468,6 +484,18 @@ async def _execute_tool_calls( try: tool_result = await _execute_single_tool(agent, tool_name, arguments) + # Extract and store memory_hint from tool response (deterministic) + if short_term_memory and isinstance(tool_result, dict): + memory_hint = tool_result.get("memory_hint") + if memory_hint and isinstance(memory_hint, dict): + for key, value in memory_hint.items(): + short_term_memory.store( + key=key, + value=value, + source="tool_hint", + ) + logger.info(f"Stored memory hint from {tool_name}: {key}") + # Cache structured data if present if tool_data_cache and isinstance(tool_result, dict): # Look for common data fields, otherwise cache the whole result diff --git a/src/caal/memory/__init__.py b/src/caal/memory/__init__.py new file mode 100644 index 0000000..f825d57 --- /dev/null +++ b/src/caal/memory/__init__.py @@ -0,0 +1,34 @@ +"""CAAL Memory System. + +Provides memory capabilities for the voice agent: + - Short-term: Session/task context with TTL-based expiry + - Long-term (future): Knowledge graph with semantic search + +Example: + >>> from caal.memory import ShortTermMemory + >>> memory = ShortTermMemory() + >>> memory.store("flight_number", "UA1234") + >>> memory.get("flight_number") + 'UA1234' +""" + +from .base import ( + DEFAULT_TTL_SECONDS, + MEMORY_DIR, + MemoryEntry, + MemorySource, + MemoryStore, +) +from .short_term import ShortTermMemory + +__all__ = [ + # Classes + "ShortTermMemory", + # Types + "MemoryEntry", + "MemorySource", + "MemoryStore", + # Constants + "DEFAULT_TTL_SECONDS", + "MEMORY_DIR", +] diff --git a/src/caal/memory/base.py b/src/caal/memory/base.py new file mode 100644 index 0000000..d205419 --- /dev/null +++ b/src/caal/memory/base.py @@ -0,0 +1,51 @@ +"""Base types and constants for CAAL memory systems. + +This module provides shared types used by both short-term and future long-term +memory implementations, ensuring consistent interfaces across the memory layer. + +Memory Architecture: + Short-term: Session/task context, JSON key-value, TTL-based expiry + Long-term (future): Knowledge graph, embeddings, semantic search +""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Any, Literal, TypedDict + +# Memory storage directory - same as settings.json location +_SCRIPT_DIR = Path(__file__).parent.parent.parent.parent # src/caal/memory -> project root +MEMORY_DIR = Path(os.getenv("CAAL_MEMORY_DIR", _SCRIPT_DIR)) + +# Default TTL for auto-stored data (24 hours) +DEFAULT_TTL_SECONDS = 86400 + +# Source types for tracking where memory entries came from +MemorySource = Literal["tool_hint", "explicit", "api"] + + +class MemoryEntry(TypedDict): + """Single memory entry with metadata. + + Attributes: + value: The stored data (any JSON-serializable type) + stored_at: Unix timestamp when entry was created + expires_at: Unix timestamp for expiry, or None for no expiry + source: How the entry was created (tool_hint, explicit, api) + """ + + value: Any + stored_at: float + expires_at: float | None + source: MemorySource + + +class MemoryStore(TypedDict): + """Full memory store structure. + + Attributes: + entries: Dict mapping keys to MemoryEntry objects + """ + + entries: dict[str, MemoryEntry] diff --git a/src/caal/memory/short_term.py b/src/caal/memory/short_term.py new file mode 100644 index 0000000..713f163 --- /dev/null +++ b/src/caal/memory/short_term.py @@ -0,0 +1,294 @@ +"""Short-term memory for CAAL voice agent. + +Provides session-persistent memory storage with optional TTL-based expiry. +Memory survives agent restarts via file-based JSON persistence. + +Three storage mechanisms feed into this: + 1. Auto-store: Tool responses with memory_hint field + 2. Explicit: memory_short tool with store/get/delete/list actions + 3. API: HTTP endpoints for external systems + +Example: + >>> from caal.memory import ShortTermMemory + >>> memory = ShortTermMemory() + >>> memory.store("flight_number", "UA1234", source="explicit") + >>> memory.get("flight_number") + 'UA1234' +""" + +from __future__ import annotations + +import json +import logging +import time +from typing import Any + +from .base import ( + DEFAULT_TTL_SECONDS, + MEMORY_DIR, + MemoryEntry, + MemorySource, + MemoryStore, +) + +logger = logging.getLogger(__name__) + +# File path for short-term memory persistence +SHORT_TERM_MEMORY_PATH = MEMORY_DIR / "short_term_memory.json" + + +class ShortTermMemory: + """Global singleton for short-term memory. + + Thread-safe, file-backed storage with in-memory cache. + Automatically cleans up expired entries on access. + + Attributes: + _instance: Singleton instance (class-level) + _cache: In-memory cache of the memory store + """ + + _instance: ShortTermMemory | None = None + _cache: MemoryStore | None = None + + def __new__(cls) -> ShortTermMemory: + """Singleton pattern - returns existing instance or creates new one.""" + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._load() + return cls._instance + + def store( + self, + key: str, + value: Any, + ttl_seconds: int | None = DEFAULT_TTL_SECONDS, + source: MemorySource = "explicit", + ) -> None: + """Store a value in memory. + + Args: + key: Descriptive key (e.g., "flight_number", "tracking_code") + value: Any JSON-serializable data to store + ttl_seconds: Time-to-live in seconds. None for no expiry. + source: Origin of data ("tool_hint", "explicit", "api") + """ + now = time.time() + expires_at = now + ttl_seconds if ttl_seconds is not None else None + + entry: MemoryEntry = { + "value": value, + "stored_at": now, + "expires_at": expires_at, + "source": source, + } + + self._ensure_cache() + self._cache["entries"][key] = entry + self._save() + + ttl_str = f"ttl={ttl_seconds}s" if ttl_seconds else "no expiry" + logger.info(f"Stored memory: {key} (source={source}, {ttl_str})") + + def get(self, key: str) -> Any | None: + """Get a value from memory. + + Automatically removes the entry if it has expired. + + Args: + key: The memory key to retrieve + + Returns: + The stored value, or None if not found or expired + """ + self._ensure_cache() + entry = self._cache["entries"].get(key) + + if entry is None: + return None + + # Check expiry + if entry["expires_at"] is not None and time.time() > entry["expires_at"]: + self.delete(key) + logger.debug(f"Memory expired: {key}") + return None + + return entry["value"] + + def delete(self, key: str) -> bool: + """Delete a key from memory. + + Args: + key: The memory key to delete + + Returns: + True if key existed and was deleted, False otherwise + """ + self._ensure_cache() + + if key in self._cache["entries"]: + del self._cache["entries"][key] + self._save() + logger.info(f"Deleted memory: {key}") + return True + + return False + + def list_keys(self) -> list[dict[str, Any]]: + """List all non-expired memory keys with metadata. + + Automatically cleans up expired entries. + + Returns: + List of dicts with key, stored_at, expires_at, source + """ + self._ensure_cache() + self.cleanup_expired() + + result = [] + for key, entry in self._cache["entries"].items(): + result.append({ + "key": key, + "stored_at": entry["stored_at"], + "expires_at": entry["expires_at"], + "source": entry["source"], + }) + + return result + + def get_all(self) -> list[dict[str, Any]]: + """Get all non-expired memory entries with full data. + + Returns: + List of dicts with key, value, stored_at, expires_at, source + """ + self._ensure_cache() + self.cleanup_expired() + + result = [] + for key, entry in self._cache["entries"].items(): + result.append({ + "key": key, + "value": entry["value"], + "stored_at": entry["stored_at"], + "expires_at": entry["expires_at"], + "source": entry["source"], + }) + + return result + + def get_context_message(self) -> str | None: + """Format memory as context string for LLM injection. + + Returns: + Formatted string for system message, or None if empty + """ + self._ensure_cache() + self.cleanup_expired() + + if not self._cache["entries"]: + return None + + now = time.time() + lines = [ + "Short-term memory (check before asking user for previously-mentioned info):" + ] + + for key, entry in self._cache["entries"].items(): + value = entry["value"] + source = entry["source"] + + # Format value (truncate if too long) + if isinstance(value, dict): + value_str = json.dumps(value) + elif isinstance(value, list): + value_str = json.dumps(value) + else: + value_str = str(value) + + if len(value_str) > 100: + value_str = value_str[:97] + "..." + + # Format expiry + if entry["expires_at"] is not None: + remaining = entry["expires_at"] - now + if remaining > 3600: + expiry_str = f"{remaining / 3600:.1f}h" + elif remaining > 60: + expiry_str = f"{remaining / 60:.0f}m" + else: + expiry_str = f"{remaining:.0f}s" + expiry_info = f", expires: {expiry_str}" + else: + expiry_info = ", no expiry" + + lines.append(f"- {key}: {value_str} (source: {source}{expiry_info})") + + return "\n".join(lines) + + def clear(self) -> None: + """Clear all memory entries.""" + self._cache = {"entries": {}} + self._save() + logger.info("Cleared all short-term memory") + + def cleanup_expired(self) -> int: + """Remove all expired entries. + + Returns: + Number of entries cleaned up + """ + self._ensure_cache() + now = time.time() + expired_keys = [] + + for key, entry in self._cache["entries"].items(): + if entry["expires_at"] is not None and now > entry["expires_at"]: + expired_keys.append(key) + + for key in expired_keys: + del self._cache["entries"][key] + + if expired_keys: + self._save() + logger.info(f"Cleaned up {len(expired_keys)} expired memory entries") + + return len(expired_keys) + + def _ensure_cache(self) -> None: + """Ensure cache is loaded.""" + if self._cache is None: + self._load() + + def _load(self) -> None: + """Load memory from file.""" + if SHORT_TERM_MEMORY_PATH.exists(): + try: + with open(SHORT_TERM_MEMORY_PATH) as f: + data = json.load(f) + self._cache = {"entries": data.get("entries", {})} + logger.debug(f"Loaded short-term memory from {SHORT_TERM_MEMORY_PATH}") + return + except Exception as e: + logger.warning(f"Failed to load short-term memory: {e}") + + self._cache = {"entries": {}} + + def _save(self) -> None: + """Save memory to file.""" + if self._cache is None: + return + + try: + SHORT_TERM_MEMORY_PATH.parent.mkdir(parents=True, exist_ok=True) + with open(SHORT_TERM_MEMORY_PATH, "w") as f: + json.dump(self._cache, f, indent=2) + logger.debug(f"Saved short-term memory to {SHORT_TERM_MEMORY_PATH}") + except Exception as e: + logger.error(f"Failed to save short-term memory: {e}") + + def reload(self) -> None: + """Force reload memory from disk.""" + self._cache = None + self._load() + logger.info("Reloaded short-term memory from disk") diff --git a/src/caal/webhooks.py b/src/caal/webhooks.py index e459b09..64041bb 100644 --- a/src/caal/webhooks.py +++ b/src/caal/webhooks.py @@ -48,6 +48,7 @@ def run_webhook_server(): from . import registry_cache from . import settings as settings_module +from .memory import ShortTermMemory logger = logging.getLogger(__name__) @@ -1449,6 +1450,173 @@ async def get_n8n_workflow(workflow_id: str) -> N8nWorkflowDetailResponse: ) +# ============================================================================= +# Short-Term Memory Endpoints +# ============================================================================= + + +class MemoryEntryResponse(BaseModel): + """Response body for a single memory entry.""" + + key: str + value: dict | list | str | int | float | bool | None + stored_at: float + expires_at: float | None + source: str + + +class MemoryListResponse(BaseModel): + """Response body for GET /memory endpoint.""" + + entries: list[MemoryEntryResponse] + + +class MemoryStoreRequest(BaseModel): + """Request body for POST /memory endpoint.""" + + key: str + value: dict | list | str | int | float | bool | None + ttl_seconds: int | None = None + + +class MemoryStoreResponse(BaseModel): + """Response body for POST /memory endpoint.""" + + status: str + key: str + + +class MemoryDeleteResponse(BaseModel): + """Response body for DELETE /memory endpoint.""" + + status: str + key: str + + +class MemoryClearResponse(BaseModel): + """Response body for DELETE /memory (clear all) endpoint.""" + + status: str + cleared_count: int + + +@app.get("/memory", response_model=MemoryListResponse) +async def get_memory() -> MemoryListResponse: + """Get all short-term memory entries. + + Returns: + MemoryListResponse with all non-expired entries + """ + memory = ShortTermMemory() + entries = memory.get_all() + + return MemoryListResponse( + entries=[ + MemoryEntryResponse( + key=e["key"], + value=e["value"], + stored_at=e["stored_at"], + expires_at=e["expires_at"], + source=e["source"], + ) + for e in entries + ] + ) + + +@app.get("/memory/{key}", response_model=MemoryEntryResponse) +async def get_memory_entry(key: str) -> MemoryEntryResponse: + """Get a single memory entry by key. + + Args: + key: The memory key to retrieve + + Returns: + MemoryEntryResponse with the entry data + + Raises: + HTTPException: 404 if key not found or expired + """ + memory = ShortTermMemory() + entries = memory.get_all() + + # Find the entry with matching key + for entry in entries: + if entry["key"] == key: + return MemoryEntryResponse( + key=entry["key"], + value=entry["value"], + stored_at=entry["stored_at"], + expires_at=entry["expires_at"], + source=entry["source"], + ) + + raise HTTPException(status_code=404, detail=f"Key not found: {key}") + + +@app.post("/memory", response_model=MemoryStoreResponse) +async def store_memory(req: MemoryStoreRequest) -> MemoryStoreResponse: + """Store a value in short-term memory. + + Args: + req: MemoryStoreRequest with key, value, and optional TTL + + Returns: + MemoryStoreResponse with status + """ + memory = ShortTermMemory() + memory.store( + key=req.key, + value=req.value, + ttl_seconds=req.ttl_seconds, + source="api", + ) + + logger.info(f"Memory stored via API: {req.key}") + + return MemoryStoreResponse(status="stored", key=req.key) + + +@app.delete("/memory/{key}", response_model=MemoryDeleteResponse) +async def delete_memory_entry(key: str) -> MemoryDeleteResponse: + """Delete a single memory entry. + + Args: + key: The memory key to delete + + Returns: + MemoryDeleteResponse with status + + Raises: + HTTPException: 404 if key not found + """ + memory = ShortTermMemory() + deleted = memory.delete(key) + + if not deleted: + raise HTTPException(status_code=404, detail=f"Key not found: {key}") + + return MemoryDeleteResponse(status="deleted", key=key) + + +@app.delete("/memory", response_model=MemoryClearResponse) +async def clear_memory() -> MemoryClearResponse: + """Clear all short-term memory entries. + + Returns: + MemoryClearResponse with status and count of cleared entries + """ + memory = ShortTermMemory() + entries = memory.list_keys() + count = len(entries) + + memory.clear() + + logger.info(f"Memory cleared via API: {count} entries removed") + + return MemoryClearResponse(status="cleared", cleared_count=count) + + # ============================================================================= # Prewarm Endpoint # ============================================================================= diff --git a/voice_agent.py b/voice_agent.py index b22c4a0..f7f125f 100644 --- a/voice_agent.py +++ b/voice_agent.py @@ -49,12 +49,14 @@ from caal import CAALLLM # noqa: E402 from caal.integrations import ( # noqa: E402 + MemoryTools, WebSearchTools, discover_n8n_workflows, initialize_mcp_servers, load_mcp_config, ) from caal.llm import ToolDataCache, llm_node # noqa: E402 +from caal.memory import ShortTermMemory # noqa: E402 from caal.stt import WakeWordGatedSTT # noqa: E402 # Configure logging - LiveKit adds LogQueueHandler to root in worker processes, @@ -336,8 +338,8 @@ async def hass_get_state(target: str = None) -> str: return tool_definitions, tool_callables -class VoiceAssistant(WebSearchTools, Agent): - """Voice assistant with MCP tools and web search.""" +class VoiceAssistant(MemoryTools, WebSearchTools, Agent): + """Voice assistant with MCP tools, web search, and short-term memory.""" def __init__( self, @@ -352,6 +354,7 @@ def __init__( max_turns: int = 20, hass_tool_definitions: list[dict] | None = None, hass_tool_callables: dict | None = None, + short_term_memory: ShortTermMemory | None = None, ) -> None: super().__init__( instructions=load_prompt(language=language), @@ -381,6 +384,9 @@ def __init__( self._tool_data_cache = ToolDataCache(max_entries=tool_cache_size) self._max_turns = max_turns + # Short-term memory for persistent context (MemoryTools mixin requirement) + self._short_term_memory = short_term_memory + async def llm_node(self, chat_ctx, tools, model_settings): """Custom LLM node using provider-agnostic interface.""" async for chunk in llm_node( @@ -388,6 +394,7 @@ async def llm_node(self, chat_ctx, tools, model_settings): chat_ctx, provider=self._provider, tool_data_cache=self._tool_data_cache, + short_term_memory=self._short_term_memory, max_turns=self._max_turns, ): yield chunk @@ -702,6 +709,14 @@ async def _publish_tool_status( hass_tool_definitions, hass_tool_callables = create_hass_tools(hass_server) logger.info("Home Assistant tools enabled: hass_control, hass_get_state") + # Initialize short-term memory (singleton, persists across restarts) + short_term_memory = ShortTermMemory() + memory_count = len(short_term_memory.list_keys()) + if memory_count > 0: + logger.info(f"Short-term memory loaded: {memory_count} entries") + else: + logger.info("Short-term memory initialized (empty)") + # Create agent with CAALLLM and all MCP servers assistant = VoiceAssistant( caal_llm=caal_llm, @@ -715,6 +730,7 @@ async def _publish_tool_status( max_turns=runtime["max_turns"], hass_tool_definitions=hass_tool_definitions, hass_tool_callables=hass_tool_callables, + short_term_memory=short_term_memory, ) # Create event to wait for session close (BEFORE session.start to avoid race condition) From aaae3be91ae1397f58c26a8c48dae84e99847aba Mon Sep 17 00:00:00 2001 From: Corey McCallum Date: Tue, 3 Feb 2026 14:29:55 -0800 Subject: [PATCH 02/10] feat(memory): extend TTL to 7 days, support tool-specified TTL - Change default TTL from 24h to 7 days (604800s) - Allow tools to specify custom TTL in memory_hint: - Simple value: uses default 7d TTL - {"value": ..., "ttl": seconds}: custom TTL - {"value": ..., "ttl": null}: no expiry Co-Authored-By: Claude Opus 4.5 --- src/caal/llm/llm_node.py | 22 ++++++++++++++++++++-- src/caal/memory/base.py | 4 ++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/caal/llm/llm_node.py b/src/caal/llm/llm_node.py index 931639a..e2849e8 100644 --- a/src/caal/llm/llm_node.py +++ b/src/caal/llm/llm_node.py @@ -485,13 +485,31 @@ async def _execute_tool_calls( tool_result = await _execute_single_tool(agent, tool_name, arguments) # Extract and store memory_hint from tool response (deterministic) + # Supports two formats: + # {"memory_hint": {"key": "simple_value"}} → 7d default TTL + # {"memory_hint": {"key": {"value": "...", "ttl": 3600}}} → custom TTL + # {"memory_hint": {"key": {"value": "...", "ttl": null}}} → no expiry if short_term_memory and isinstance(tool_result, dict): memory_hint = tool_result.get("memory_hint") if memory_hint and isinstance(memory_hint, dict): - for key, value in memory_hint.items(): + from caal.memory.base import DEFAULT_TTL_SECONDS + + for key, hint_value in memory_hint.items(): + # Check if hint_value is extended format with ttl + if isinstance(hint_value, dict) and "value" in hint_value: + actual_value = hint_value["value"] + # "ttl" in dict distinguishes missing (use default) vs null (no expiry) + if "ttl" in hint_value: + ttl = hint_value["ttl"] # Could be int or None + else: + ttl = DEFAULT_TTL_SECONDS + else: + actual_value = hint_value + ttl = DEFAULT_TTL_SECONDS short_term_memory.store( key=key, - value=value, + value=actual_value, + ttl_seconds=ttl, source="tool_hint", ) logger.info(f"Stored memory hint from {tool_name}: {key}") diff --git a/src/caal/memory/base.py b/src/caal/memory/base.py index d205419..e4bc91e 100644 --- a/src/caal/memory/base.py +++ b/src/caal/memory/base.py @@ -18,8 +18,8 @@ _SCRIPT_DIR = Path(__file__).parent.parent.parent.parent # src/caal/memory -> project root MEMORY_DIR = Path(os.getenv("CAAL_MEMORY_DIR", _SCRIPT_DIR)) -# Default TTL for auto-stored data (24 hours) -DEFAULT_TTL_SECONDS = 86400 +# Default TTL for auto-stored data (7 days) +DEFAULT_TTL_SECONDS = 604800 # Source types for tracking where memory entries came from MemorySource = Literal["tool_hint", "explicit", "api"] From 59c251209501b4e5e9ff572f7f63850684c42992 Mon Sep 17 00:00:00 2001 From: Corey McCallum Date: Tue, 3 Feb 2026 21:10:47 -0800 Subject: [PATCH 03/10] fix(agent): proper tool call chaining in llm_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace linear execute→stream→retry with a loop that supports multi-step tool chaining. Model can now: call tool A → get result → call tool B → get result → generate text response. Previously, after one tool execution the code tried to stream a text response. If the model wanted to chain (call another tool), it produced 0 text chunks, triggering a retry without tools that crashed Ollama (tool references in messages but no tools registered). New flow: - Loop non-streaming chat() calls (max 5 rounds) - Each round: if tool_calls → execute → loop back - When no tool_calls → yield content or stream final response - Safety fallback: _strip_tool_messages converts tool messages to plain text if Ollama still crashes on the streaming path Co-Authored-By: Claude Opus 4.5 --- src/caal/llm/llm_node.py | 193 ++++++++++++++++++++++++++------------- 1 file changed, 127 insertions(+), 66 deletions(-) diff --git a/src/caal/llm/llm_node.py b/src/caal/llm/llm_node.py index e2849e8..0088b9a 100644 --- a/src/caal/llm/llm_node.py +++ b/src/caal/llm/llm_node.py @@ -116,47 +116,81 @@ async def llm_node(self, chat_ctx, tools, model_settings): # Discover tools from agent and MCP servers tools = await _discover_tools(agent) - # If tools available, check for tool calls first (non-streaming) + # If tools available, loop non-streaming calls to support chaining + # Model can call tool A → get result → call tool B → get result → text + max_tool_rounds = 5 + tool_round = 0 + if tools: - try: - response = await provider.chat(messages=messages, tools=tools) - except Exception as tool_err: - # Tool call generation failed (e.g., model garbled tool name) - # Retry once, then fall back to streaming without tools - err_msg = str(tool_err) - is_malformed_tool = ( - "tool_use_failed" in err_msg - or "Failed to call a function" in err_msg - # Ollama: model leaked think/control tokens into tool name - # e.g. "tool '{}[/THINK][TOOL_CALLS]notion_tasks' not found" - or "[/THINK]" in err_msg - or "[TOOL_CALLS]" in err_msg - or "= max_tool_rounds: + logger.warning( + f"Hit max tool rounds ({max_tool_rounds}), streaming response" + ) - # No tools or no tool calls - stream directly - # Publish no-tool status immediately + # Stream final response (after tool chain or no tools) + # If we executed tools, messages contains full tool history if hasattr(agent, "_on_tool_status") and agent._on_tool_status: import asyncio asyncio.create_task(agent._on_tool_status(False, [], [])) - async for chunk in provider.chat_stream(messages=messages): - yield strip_markdown_for_tts(chunk) + if tool_round > 0: + # After tool execution — pass tools so Ollama can validate + # tool_calls in message history + logger.info("Streaming response after tool execution...") + try: + async for chunk in provider.chat_stream( + messages=messages, tools=tools + ): + yield strip_markdown_for_tts(chunk) + except Exception as stream_err: + # Safety fallback: strip tool messages and retry without tools + logger.warning( + f"Post-tool streaming failed: {stream_err}. " + "Retrying with stripped tool messages..." + ) + clean_messages = _strip_tool_messages(messages) + async for chunk in provider.chat_stream(messages=clean_messages): + yield strip_markdown_for_tts(chunk) + else: + # No tools or no tool calls — plain streaming + async for chunk in provider.chat_stream(messages=messages): + yield strip_markdown_for_tts(chunk) except Exception as e: logger.error(f"Error in llm_node: {e}", exc_info=True) yield f"I encountered an error: {e}" +def _strip_tool_messages(messages: list[dict]) -> list[dict]: + """Convert tool call/result messages to plain text. + + Ollama crashes if messages contain tool references but no tools are + registered. This converts tool messages to plain text equivalents, + preserving the context so the model knows what happened. + """ + cleaned = [] + for msg in messages: + if msg.get("role") == "tool": + # Tool result → system message with the content + cleaned.append({ + "role": "system", + "content": f"Tool result: {msg.get('content', '')}", + }) + elif msg.get("role") == "assistant" and msg.get("tool_calls"): + # Assistant tool call → plain assistant message + parts = [] + if msg.get("content"): + parts.append(msg["content"]) + for tc in msg["tool_calls"]: + func = tc.get("function", {}) + name = func.get("name", "unknown") + args = func.get("arguments", {}) + parts.append(f"[Called {name} with {args}]") + cleaned.append({ + "role": "assistant", + "content": "\n".join(parts), + }) + else: + cleaned.append(msg) + return cleaned + + def _build_messages_from_context( chat_ctx, tool_data_cache: ToolDataCache | None = None, From d8d5fcd19953bc04d95b7911629c97ffd5d20524 Mon Sep 17 00:00:00 2001 From: Corey McCallum Date: Tue, 3 Feb 2026 22:01:06 -0800 Subject: [PATCH 04/10] fix(agent): dedup tool calls, persist indicator, inject call args into context - Deduplicate identical tool calls within a single round (same name + args) - Accumulate tool names/params across chained rounds for frontend indicator - Keep tool indicator showing after response (don't clear when tools were used) - Include tool call arguments in ToolDataCache context injection Co-Authored-By: Claude Opus 4.5 --- src/caal/llm/llm_node.py | 48 ++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/src/caal/llm/llm_node.py b/src/caal/llm/llm_node.py index 0088b9a..9c5e221 100644 --- a/src/caal/llm/llm_node.py +++ b/src/caal/llm/llm_node.py @@ -52,9 +52,9 @@ def __init__(self, max_entries: int = 3): self.max_entries = max_entries self._cache: list[dict] = [] - def add(self, tool_name: str, data: Any) -> None: - """Add tool response data to cache.""" - entry = {"tool": tool_name, "data": data, "timestamp": time.time()} + def add(self, tool_name: str, data: Any, arguments: dict | None = None) -> None: + """Add tool call and response data to cache.""" + entry = {"tool": tool_name, "args": arguments, "data": data, "timestamp": time.time()} self._cache.append(entry) if len(self._cache) > self.max_entries: self._cache.pop(0) # Remove oldest @@ -63,9 +63,10 @@ def get_context_message(self) -> str | None: """Format cached data as context string for LLM injection.""" if not self._cache: return None - parts = ["Recent tool response data for reference:"] + parts = ["Recent tool calls and responses for reference:"] for entry in self._cache: - parts.append(f"\n{entry['tool']}: {json.dumps(entry['data'])}") + args = json.dumps(entry['args']) if entry.get('args') else '' + parts.append(f"\n{entry['tool']}({args}) → {json.dumps(entry['data'])}") return "\n".join(parts) def clear(self) -> None: @@ -120,6 +121,8 @@ async def llm_node(self, chat_ctx, tools, model_settings): # Model can call tool A → get result → call tool B → get result → text max_tool_rounds = 5 tool_round = 0 + all_tool_names: list[str] = [] + all_tool_params: list[dict] = [] if tools: while tool_round < max_tool_rounds: @@ -169,13 +172,8 @@ async def llm_node(self, chat_ctx, tools, model_settings): if not response.tool_calls: # Model is done with tools if response.content: - # Publish no-tool status - if hasattr(agent, "_on_tool_status") and agent._on_tool_status: - import asyncio - - asyncio.create_task( - agent._on_tool_status(False, [], []) - ) + # Don't clear tool status — keep indicator showing + # which tools were used for this response yield strip_markdown_for_tts(response.content) return break # No content either, fall through to streaming @@ -187,15 +185,15 @@ async def llm_node(self, chat_ctx, tools, model_settings): f"{len(response.tool_calls)} call(s)" ) - # Track tool usage for frontend indicator - tool_names = [tc.name for tc in response.tool_calls] - tool_params = [tc.arguments for tc in response.tool_calls] + # Accumulate tool usage across rounds for frontend indicator + all_tool_names.extend(tc.name for tc in response.tool_calls) + all_tool_params.extend(tc.arguments for tc in response.tool_calls) if hasattr(agent, "_on_tool_status") and agent._on_tool_status: import asyncio asyncio.create_task( - agent._on_tool_status(True, tool_names, tool_params) + agent._on_tool_status(True, all_tool_names, all_tool_params) ) messages = await _execute_tool_calls( @@ -215,8 +213,8 @@ async def llm_node(self, chat_ctx, tools, model_settings): ) # Stream final response (after tool chain or no tools) - # If we executed tools, messages contains full tool history - if hasattr(agent, "_on_tool_status") and agent._on_tool_status: + # Only clear tool indicator if no tools were called this turn + if tool_round == 0 and hasattr(agent, "_on_tool_status") and agent._on_tool_status: import asyncio asyncio.create_task(agent._on_tool_status(False, [], [])) @@ -536,6 +534,18 @@ async def _execute_tool_calls( ) messages.append(tool_call_message) + # Deduplicate identical tool calls (same name + same args) + seen = set() + unique_tool_calls = [] + for tc in tool_calls: + key = (tc.name, json.dumps(tc.arguments, sort_keys=True)) + if key not in seen: + seen.add(key) + unique_tool_calls.append(tc) + else: + logger.info(f"Dedup: skipping duplicate {tc.name} call with identical args") + tool_calls = unique_tool_calls + # Execute each tool for tool_call in tool_calls: tool_name = tool_call.name @@ -583,7 +593,7 @@ async def _execute_tool_calls( or tool_result.get("results") or tool_result ) - tool_data_cache.add(tool_name, data) + tool_data_cache.add(tool_name, data, arguments=arguments) logger.debug(f"Cached tool data for {tool_name}") # Format tool result - preserve JSON structure for LLM From c4cd33c306ff2b8d86a08f447c33c091d0a3d70e Mon Sep 17 00:00:00 2001 From: Corey McCallum Date: Thu, 5 Feb 2026 20:28:56 -0800 Subject: [PATCH 05/10] fix(memory): write to /app/data volume for Docker persistence Memory file was failing with permission denied because /app is owned by root. Now uses CAAL_MEMORY_DIR=/app/data (the caal-memory volume) and entrypoint ensures directory is writable by agent user. Co-Authored-By: Claude Opus 4.6 --- docker-compose.apple.yaml | 1 + docker-compose.cpu.yaml | 1 + docker-compose.yaml | 1 + entrypoint.sh | 5 +++++ 4 files changed, 8 insertions(+) diff --git a/docker-compose.apple.yaml b/docker-compose.apple.yaml index 895b635..4a6997e 100644 --- a/docker-compose.apple.yaml +++ b/docker-compose.apple.yaml @@ -88,6 +88,7 @@ services: - ./.env environment: - LIVEKIT_URL=ws://livekit:7880 + - CAAL_MEMORY_DIR=/app/data # Point to mlx-audio (single service for STT + TTS) - SPEACHES_URL=${MLX_AUDIO_URL:-http://host.docker.internal:8001} - KOKORO_URL=${MLX_AUDIO_URL:-http://host.docker.internal:8001} diff --git a/docker-compose.cpu.yaml b/docker-compose.cpu.yaml index 454fcce..ef2600a 100644 --- a/docker-compose.cpu.yaml +++ b/docker-compose.cpu.yaml @@ -80,6 +80,7 @@ services: environment: - LIVEKIT_URL=ws://livekit:7880 - SPEACHES_URL=http://speaches:8000 + - CAAL_MEMORY_DIR=/app/data volumes: - caal-memory:/app/data - caal-config:/app/config # Runtime config (settings.json, mcp_servers.json) diff --git a/docker-compose.yaml b/docker-compose.yaml index 190fb28..443c459 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -76,6 +76,7 @@ services: - LIVEKIT_URL=ws://livekit:7880 - SPEACHES_URL=http://speaches:8000 - KOKORO_URL=http://kokoro:8880 + - CAAL_MEMORY_DIR=/app/data volumes: - caal-memory:/app/data - caal-config:/app/config # Runtime config (settings.json, mcp_servers.json) diff --git a/entrypoint.sh b/entrypoint.sh index 1b58e9c..5491d6a 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -5,11 +5,16 @@ set -e CONFIG_DIR="/app/config" +DATA_DIR="/app/data" # Ensure config directory exists and is writable by agent mkdir -p "$CONFIG_DIR" chown agent:agent "$CONFIG_DIR" +# Ensure data directory exists and is writable by agent (memory persistence) +mkdir -p "$DATA_DIR" +chown agent:agent "$DATA_DIR" + # settings.json - copy default if missing if [ ! -f "$CONFIG_DIR/settings.json" ]; then echo "Creating settings.json from defaults..." From 323fb3291d501a792ea1c4e560bd0b482bdfabc1 Mon Sep 17 00:00:00 2001 From: Corey McCallum Date: Thu, 5 Feb 2026 20:36:00 -0800 Subject: [PATCH 06/10] fix(memory): mark context as internal so LLM won't read it aloud Co-Authored-By: Claude Opus 4.6 --- src/caal/memory/short_term.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/caal/memory/short_term.py b/src/caal/memory/short_term.py index 713f163..28c58aa 100644 --- a/src/caal/memory/short_term.py +++ b/src/caal/memory/short_term.py @@ -191,7 +191,8 @@ def get_context_message(self) -> str | None: now = time.time() lines = [ - "Short-term memory (check before asking user for previously-mentioned info):" + "[INTERNAL CONTEXT - DO NOT read aloud or announce. " + "Use silently when relevant to the conversation.]" ] for key, entry in self._cache["entries"].items(): From fdde93dff37a80ada351fed0415343a2275c4aa6 Mon Sep 17 00:00:00 2001 From: Corey McCallum Date: Thu, 5 Feb 2026 20:37:17 -0800 Subject: [PATCH 07/10] fix(memory): only inject context after user has spoken Prevents the LLM from using memory data in the initial greeting. Memory context is now skipped when there are no user messages yet. Co-Authored-By: Claude Opus 4.6 --- src/caal/llm/llm_node.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/caal/llm/llm_node.py b/src/caal/llm/llm_node.py index 9c5e221..486fc5e 100644 --- a/src/caal/llm/llm_node.py +++ b/src/caal/llm/llm_node.py @@ -361,8 +361,9 @@ def _build_messages_from_context( if context: messages.append({"role": "system", "content": context}) - # 3. Inject short-term memory context - if short_term_memory: + # 3. Inject short-term memory context (only after user has spoken) + has_user_message = any(m["role"] == "user" for m in chat_messages) + if short_term_memory and has_user_message: memory_context = short_term_memory.get_context_message() if memory_context: messages.append({"role": "system", "content": memory_context}) From 351b374bf78ce38e01ee0d55873e7d468d7963d4 Mon Sep 17 00:00:00 2001 From: Corey McCallum Date: Thu, 5 Feb 2026 20:50:33 -0800 Subject: [PATCH 08/10] refactor(memory): keep context injection as awareness hint for tool chaining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Context injection helps the LLM know what's in memory so it can chain tools correctly (e.g. memory_short → flight_tracker). Without it, the model may skip memory and go to other tools directly. Co-Authored-By: Claude Opus 4.6 --- src/caal/llm/llm_node.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/caal/llm/llm_node.py b/src/caal/llm/llm_node.py index 486fc5e..8300ee4 100644 --- a/src/caal/llm/llm_node.py +++ b/src/caal/llm/llm_node.py @@ -292,13 +292,13 @@ def _build_messages_from_context( Message order: 1. System prompt (always first, never trimmed) 2. Tool data context (injected from cache) - 3. Short-term memory context (persistent facts) + 3. Short-term memory context (awareness hint for tool chaining) 4. Chat history (sliding window applied) Args: chat_ctx: LiveKit chat context tool_data_cache: Cache of recent tool response data - short_term_memory: Short-term memory for persistent context + short_term_memory: Short-term memory for context awareness max_turns: Max conversation turns to keep (1 turn = user + assistant) """ system_prompt = None From 0e4603cd6274388408d8984d046e554ae836c1f7 Mon Sep 17 00:00:00 2001 From: Corey McCallum Date: Thu, 5 Feb 2026 21:23:46 -0800 Subject: [PATCH 09/10] feat(memory): add inline edit in memory panel, fix registry_cache permissions - Memory detail modal now has pencil icon to edit values in-place - Add registry_cache.json symlink to entrypoint.sh (same pattern as settings.json) to fix permission denied on /app/registry_cache.json Co-Authored-By: Claude Opus 4.6 --- entrypoint.sh | 7 ++ frontend/components/memory/memory-panel.tsx | 84 ++++++++++++++++++--- 2 files changed, 81 insertions(+), 10 deletions(-) diff --git a/entrypoint.sh b/entrypoint.sh index 5491d6a..17e68f9 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -29,9 +29,16 @@ if [ ! -f "$CONFIG_DIR/mcp_servers.json" ]; then chown agent:agent "$CONFIG_DIR/mcp_servers.json" fi +# registry_cache.json - create empty if missing +if [ ! -f "$CONFIG_DIR/registry_cache.json" ]; then + echo '{}' > "$CONFIG_DIR/registry_cache.json" + chown agent:agent "$CONFIG_DIR/registry_cache.json" +fi + # Create symlinks from /app to config files (for code that expects them in /app) ln -sf "$CONFIG_DIR/settings.json" /app/settings.json ln -sf "$CONFIG_DIR/mcp_servers.json" /app/mcp_servers.json +ln -sf "$CONFIG_DIR/registry_cache.json" /app/registry_cache.json # Drop privileges and execute the main command as agent user exec gosu agent "$@" diff --git a/frontend/components/memory/memory-panel.tsx b/frontend/components/memory/memory-panel.tsx index 663d15a..bff3673 100644 --- a/frontend/components/memory/memory-panel.tsx +++ b/frontend/components/memory/memory-panel.tsx @@ -3,7 +3,7 @@ import { useCallback, useEffect, useState } from 'react'; import { createPortal } from 'react-dom'; import { useTranslations } from 'next-intl'; -import { Brain, Clock, Trash, X } from '@phosphor-icons/react/dist/ssr'; +import { Brain, Clock, FloppyDisk, PencilSimple, Trash, X } from '@phosphor-icons/react/dist/ssr'; import { Button } from '@/components/livekit/button'; interface MemoryEntry { @@ -82,6 +82,8 @@ export function MemoryPanel({ isOpen, onClose }: MemoryPanelProps) { const [entries, setEntries] = useState([]); const [loading, setLoading] = useState(true); const [selectedEntry, setSelectedEntry] = useState(null); + const [editing, setEditing] = useState(false); + const [editValue, setEditValue] = useState(''); const fetchMemory = useCallback(async () => { setLoading(true); @@ -131,6 +133,27 @@ export function MemoryPanel({ isOpen, onClose }: MemoryPanelProps) { } }; + const handleSaveEdit = async () => { + if (!selectedEntry) return; + + try { + const res = await fetch('/api/memory', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ key: selectedEntry.key, value: editValue }), + }); + if (res.ok) { + setEntries( + entries.map((e) => (e.key === selectedEntry.key ? { ...e, value: editValue } : e)) + ); + setSelectedEntry({ ...selectedEntry, value: editValue }); + setEditing(false); + } + } catch (err) { + console.error('Failed to save memory entry:', err); + } + }; + if (!isOpen) return null; return createPortal( @@ -187,7 +210,10 @@ export function MemoryPanel({ isOpen, onClose }: MemoryPanelProps) { key={entry.key} className="hover:bg-muted/50 flex cursor-pointer items-center justify-between rounded-lg border p-4 transition-colors" style={{ borderColor: 'var(--border-subtle)' }} - onClick={() => setSelectedEntry(entry)} + onClick={() => { + setSelectedEntry(entry); + setEditing(false); + }} >
@@ -261,14 +287,52 @@ export function MemoryPanel({ isOpen, onClose }: MemoryPanelProps) {
- -
-                {typeof selectedEntry.value === 'string'
-                  ? selectedEntry.value
-                  : JSON.stringify(selectedEntry.value, null, 2)}
-              
+
+ + {!editing && ( + + )} +
+ {editing ? ( +
+