From b231f471130128d091d1c3f38daefaa7512005fb Mon Sep 17 00:00:00 2001
From: Jiwon Kim <jiwon@openai.com>
Date: Thu, 4 Dec 2025 22:07:51 -0800
Subject: [PATCH 1/4] quickstart

---
 docs/quickstart.md | 260 +++++++++++++++++++++++++++++++++++++++++++++
 mkdocs.yml         |   1 +
 2 files changed, 261 insertions(+)
 create mode 100644 docs/quickstart.md
diff --git a/docs/quickstart.md b/docs/quickstart.md
new file mode 100644
index 0000000..0695e35
--- /dev/null
+++ b/docs/quickstart.md
@@ -0,0 +1,260 @@
+# Quick start
+
+To get a basic ChatKit app running—a React chat UI talking to a Python server—clone and run the starter app:
+
+
+```sh
+git clone https://github.com/openai/openai-chatkit-starter-app.git
+cd openai-chatkit-starter-app/chatkit
+npm run dev
+```
+
+The sections below explain the core components and steps behind the starter app.
+
+## Render chat UI
+
+!!! note ""
+    This section shows the React integration using `@openai/chatkit-react`.  
+    If you’re not using React, you can render ChatKit directly with vanilla JavaScript using `@openai/chatkit`.
+
+Install the React bindings:
+
+```sh
+npm install @openai/chatkit-react
+```
+
+In your index.html, load ChatKit.js:
+
+```html
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <script src="https://cdn.platform.openai.com/deployments/chatkit/chatkit.js"></script>
+  </head>
+  <body>
+    <div id="root"></div>
+  </body>
+</html>
+```
+
+Wire up a minimal React app. Point `api.url` at your ChatKit server endpoint and pass the domain key you configured there.
+
+```tsx
+import {ChatKit, useChatKit} from "@openai/chatkit-react";
+
+export function App() {
+  const chatkit = useChatKit({
+    api: {
+      url: "http://localhost:8000/chatkit",
+      domainKey: "local-dev", // domain keys are optional in dev
+    },
+  });
+
+  return <ChatKit control={chatkit.control} />;
+}
+```
+
+The chat UI will render, but sending messages will fail until you start the server below and provide a store for threads and messages.
+
+## Run your ChatKit server
+
+Install the ChatKit Python package and expose a single `/chatkit` endpoint that forwards requests to a `ChatKitServer` instance.
+
+```sh
+pip install openai-chatkit fastapi uvicorn
+```
+
+Create `main.py` with a minimal server that is hard-coded to always reply “Hello, world!” - you'll replace this with an actual call to a model in [Respond]
+
+```python
+# Other imports omitted for brevity; see the starter repo for a runnable file with all imports.
+from chatkit.server import ChatKitServer
+
+app = FastAPI()
+
+
+class MyChatKitServer(ChatKitServer[dict]):
+    async def respond(
+        self,
+        thread: ThreadMetadata,
+        input_user_message: UserMessageItem | None,
+        context: dict,
+    ) -> AsyncIterator[ThreadStreamEvent]:
+        # Streams a fixed "Hello, world!" assistant message
+        yield ThreadItemDoneEvent(
+            item=AssistantMessageItem(
+                thread_id=thread.id,
+                id=self.store.generate_item_id("message", thread, context),
+                created_at=datetime.now(),
+                content=[AssistantMessageContent(text="Hello, world!")],
+            ),
+        )
+
+# Create your server by passing a store implementation.
+# MyChatKitStore is defined in the next section.
+server = MyChatKitServer(store=MyChatKitStore())
+
+
+@app.post("/chatkit")
+async def chatkit(request: Request):
+    result = await server.process(await request.body(), context={})
+    if isinstance(result, StreamingResult):
+        return StreamingResponse(result, media_type="text/event-stream")
+    return Response(content=result.json, media_type="application/json")
+```
+
+All ChatKit requests go to this single endpoint. Set `api.url` on the React side to match (`/chatkit` here), and `ChatKitServer` routes each request internally.
+
+
+## Store chat data
+
+ChatKit servers require a store to load and save threads, messages, and other items.
+
+For this quickstart, use a small in-memory store so conversations persist while the process is running, without introducing a database. This keeps the example minimal while still matching real ChatKit behavior.
+
+
+```python
+from collections import defaultdict
+from chatkit.store import NotFoundError, Store
+from chatkit.types import Attachment, Page, ThreadItem, ThreadMetadata
+
+
+class MyChatKitStore(Store[dict]):
+    def __init__(self):
+        self.threads: dict[str, ThreadMetadata] = {}
+        self.items: dict[str, list[ThreadItem]] = defaultdict(list)
+
+    async def load_thread(self, thread_id: str, context: dict) -> ThreadMetadata:
+        if thread_id not in self.threads:
+            raise NotFoundError(f"Thread {thread_id} not found")
+        return self.threads[thread_id]
+
+    async def save_thread(self, thread: ThreadMetadata, context: dict) -> None:
+        self.threads[thread.id] = thread
+
+    async def load_threads(
+        self, limit: int, after: str | None, order: str, context: dict
+    ) -> Page[ThreadMetadata]:
+        threads = list(self.threads.values())
+        return self._paginate(
+            threads, after, limit, order, sort_key=lambda t: t.created_at, cursor_key=lambda t: t.id
+        )
+
+    async def load_thread_items(
+        self, thread_id: str, after: str | None, limit: int, order: str, context: dict
+    ) -> Page[ThreadItem]:
+        items = self.items.get(thread_id, [])
+        return self._paginate(
+            items, after, limit, order, sort_key=lambda i: i.created_at, cursor_key=lambda i: i.id
+        )
+
+    async def add_thread_item(
+        self, thread_id: str, item: ThreadItem, context: dict
+    ) -> None:
+        self.items[thread_id].append(item)
+
+    async def save_item(
+        self, thread_id: str, item: ThreadItem, context: dict
+    ) -> None:
+        items = self.items[thread_id]
+        for idx, existing in enumerate(items):
+            if existing.id == item.id:
+                items[idx] = item
+                return
+        items.append(item)
+
+    async def load_item(
+        self, thread_id: str, item_id: str, context: dict
+    ) -> ThreadItem:
+        for item in self.items.get(thread_id, []):
+            if item.id == item_id:
+                return item
+        raise NotFoundError(f"Item {item_id} not found in thread {thread_id}")
+
+    async def delete_thread(self, thread_id: str, context: dict) -> None:
+        self.threads.pop(thread_id, None)
+        self.items.pop(thread_id, None)
+
+    async def delete_thread_item(
+        self, thread_id: str, item_id: str, context: dict
+    ) -> None:
+        self.items[thread_id] = [
+            item for item in self.items.get(thread_id, []) if item.id != item_id
+        ]
+
+    def _paginate(self, rows: list, after: str | None, limit: int, order: str, sort_key, cursor_key):
+        sorted_rows = sorted(rows, key=sort_key, reverse=order == "desc")
+        start = 0
+        if after:
+            for idx, row in enumerate(sorted_rows):
+                if cursor_key(row) == after:
+                    start = idx + 1
+                    break
+        data = sorted_rows[start : start + limit]
+        has_more = start + limit < len(sorted_rows)
+        next_after = cursor_key(data[-1]) if has_more and data else None
+        return Page(data=data, has_more=has_more, after=next_after)
+
+    # Attachments are intentionally not implemented for the quickstart
+
+    async def save_attachment(
+        self, attachment: Attachment, context: dict
+    ) -> None:
+        raise NotImplementedError()
+
+    async def load_attachment(
+        self, attachment_id: str, context: dict
+    ) -> Attachment:
+        raise NotImplementedError()
+
+    async def delete_attachment(self, attachment_id: str, context: dict) -> None:
+        raise NotImplementedError()
+
+```
+
+This store implements only the methods required for basic chat while the server is running; persistence across restarts and attachments are intentionally omitted.
+
+For production, replace this with a database-backed store (for example, Postgres or MySQL) so threads and items persist across restarts.
+
+
+## Generate model responses
+
+Replace the hardcoded "Hello, World!" reply from [Run your ChatKit server](#run-your-chatkit-server) with an Agents SDK call to generate real responses. Set `OPENAI_API_KEY` in your environment before running.
+
+Use ChatKit's Agents SDK helpers to simplify request conversion and streaming. The `simple_to_agent_input` helper translates ChatKit thread items to agent input items, and `stream_agent_response` turns the streamed run into ChatKit events:
+
+
+```python
+from agents import Agent, Runner
+from chatkit.agents import AgentContext, simple_to_agent_input, stream_agent_response
+
+assistant = Agent(
+    name="assistant",
+    instructions="You are a helpful assistant.",
+    model="gpt-4.1-mini",
+)
+
+class MyChatKitServer(ChatKitServer[dict]):
+    async def respond(
+        self,
+        thread: ThreadMetadata,
+        input_user_message: UserMessageItem | None,
+        context: dict,
+    ) -> AsyncIterator[ThreadStreamEvent]:
+        # Convert recent thread items (which includes the user message) to model input
+        items_page = await self.store.load_thread_items(
+            thread.id,
+            after=None,
+            limit=20,
+            order="asc",
+            context=context,
+        )
+        input_items = await simple_to_agent_input(items_page.data)
+
+        # Stream the run through ChatKit events
+        agent_context = AgentContext(thread=thread, store=self.store, request_context=context)
+        result = Runner.run_streamed(assistant, input_items, context=agent_context)
+        async for event in stream_agent_response(agent_context, result):
+            yield event
+```
diff --git a/mkdocs.yml b/mkdocs.yml
index 46580c5..c546eca 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -37,6 +37,7 @@ plugins:
 
 nav:
   - Home: index.md
+  - Quick start: quickstart.md
   - Concepts:
       - Threads: concepts/threads.md
       - Thread items: concepts/thread-items.md

From 54a4bdec877ccf303d4fbc8633fdc6f8247b7287 Mon Sep 17 00:00:00 2001
From: Jiwon Kim <jiwon@openai.com>
Date: Fri, 5 Dec 2025 00:02:27 -0800
Subject: [PATCH 2/4] flatten and consolidate

---
 docs/concepts/actions.md                      |   2 +-
 docs/concepts/entities.md                     |   4 +-
 docs/concepts/thread-items.md                 |  54 ----
 docs/concepts/thread-stream-events.md         |   4 +-
 docs/concepts/threads.md                      | 127 +++++++-
 docs/concepts/widgets.md                      |   8 +-
 .../{add-features => }/accept-attachments.md  |   0
 .../{add-features => }/add-annotations.md     |   0
 .../add-features/disable-new-messages.md      |  50 ----
 .../guides/add-features/save-thread-titles.md |  49 ---
 .../{add-features => }/allow-mentions.md      |   0
 docs/guides/browse-past-threads.md            | 139 +++++++++
 docs/guides/compose-model-input.md            |  88 +-----
 .../{add-features => }/create-custom-forms.md |   0
 .../{add-features => }/handle-feedback.md     |   0
 .../handle-widget-actions.md                  |   0
 .../make-client-tool-calls.md                 |   0
 docs/guides/persist-chatkit-data.md           | 116 +-------
 docs/guides/respond-to-user-message.md        | 280 ++++++++++++++++++
 docs/guides/run-inference.md                  |  70 +----
 .../{add-features => }/send-client-effects.md |   0
 docs/guides/serve-chatkit.md                  |  97 +-----
 .../show-progress-for-long-running-tools.md   |   0
 docs/guides/stream-thread-events.md           | 103 +------
 .../{add-features => }/stream-widgets.md      |   0
 mkdocs.yml                                    |  34 +--
 26 files changed, 564 insertions(+), 661 deletions(-)
 delete mode 100644 docs/concepts/thread-items.md
 rename docs/guides/{add-features => }/accept-attachments.md (100%)
 rename docs/guides/{add-features => }/add-annotations.md (100%)
 delete mode 100644 docs/guides/add-features/disable-new-messages.md
 delete mode 100644 docs/guides/add-features/save-thread-titles.md
 rename docs/guides/{add-features => }/allow-mentions.md (100%)
 create mode 100644 docs/guides/browse-past-threads.md
 rename docs/guides/{add-features => }/create-custom-forms.md (100%)
 rename docs/guides/{add-features => }/handle-feedback.md (100%)
 rename docs/guides/{add-features => }/handle-widget-actions.md (100%)
 rename docs/guides/{add-features => }/make-client-tool-calls.md (100%)
 create mode 100644 docs/guides/respond-to-user-message.md
 rename docs/guides/{add-features => }/send-client-effects.md (100%)
 rename docs/guides/{add-features => }/show-progress-for-long-running-tools.md (100%)
 rename docs/guides/{add-features => }/stream-widgets.md (100%)

diff --git a/docs/concepts/actions.md b/docs/concepts/actions.md
index 11d4f36..9731e4e 100644
--- a/docs/concepts/actions.md
+++ b/docs/concepts/actions.md
@@ -19,4 +19,4 @@ When you set `handler: "client"`, the action flows into the client SDK’s `widg
 Your client integration can also initiate actions directly with `chatkit.sendCustomAction(action, itemId?)`, optionally namespaced to a specific widget item. The server receives these in `ChatKitServer.action` just like a widget-triggered action and can stream widgets, messages, or client effects in response. This pattern is useful when a flow starts outside a widget—or after a client-handled action—but you still want the server to persist results or involve the model.
 
 ## Related guides
-- [Handle widget actions](../guides/add-features/handle-widget-actions.md)
+- [Handle widget actions](../guides/handle-widget-actions.md)
diff --git a/docs/concepts/entities.md b/docs/concepts/entities.md
index bb5f111..805e031 100644
--- a/docs/concepts/entities.md
+++ b/docs/concepts/entities.md
@@ -17,7 +17,7 @@ Entities can be used as cited sources in assistant responses.
 **References:**
 
 - The [EntitySource](../../api/chatkit/types/#chatkit.types.EntitySource) Pydantic model definition
-- [Add annotations in assistant messages](../guides/add-features/add-annotations.md#annotating-with-custom-entities).
+- [Add annotations in assistant messages](../guides/add-annotations.md#annotating-with-custom-entities).
 
 ## Entity tags as @-mentions in user messages
 
@@ -27,4 +27,4 @@ Users can tag your entities in the composer using @-mentions.
 
 - The [Entity](https://openai.github.io/chatkit-js/api/openai/chatkit-react/type-aliases/entity/) TypeScript type definition
 - The [UserMessageTagContent](../../api/chatkit/types/#chatkit.types.UserMessageTagContent) Pydantic model definition
-- [Allow @-mentions in user messages](../guides/add-features/allow-mentions.md).
+- [Allow @-mentions in user messages](../guides/allow-mentions.md).
diff --git a/docs/concepts/thread-items.md b/docs/concepts/thread-items.md
deleted file mode 100644
index d4b3ad7..0000000
--- a/docs/concepts/thread-items.md
+++ /dev/null
@@ -1,54 +0,0 @@
-# Thread items
-
-Thread items are the individual records that make up a thread. This include user and assistant messages, widgets, workflows, and internal markers that guide processing. ChatKit orders and paginates them through your store implementation.
-
-They drive two core experiences:
-
-- **Model input**: Your server's [`respond`](../../api/chatkit/server/#chatkit.server.ChatKitServer.respond) logic will read items to build model input so the model sees the full conversation during an active turn and when resuming past threads. See [Compose model input](../guides/compose-model-input.md).
-- **UI rendering**: ChatKit.js renders items incrementally for the active thread during streaming, and re-renders the persisted items when past threads are loaded.
-
-## User messages
-
-[`UserMessageItem`](../../api/chatkit/types/#chatkit.types.UserMessageItem)s represent end-user input. A user message can include the entered text, optional `quoted_text` for reply-style UI, and attachment metadata. User text is plain (no Markdown rendering) but can include @-mentions/tags; see [Allow @-mentions in user messages](../guides/add-features/allow-mentions.md).
-
-## Assistant messages
-
-[`AssistantMessageItem`](../../api/chatkit/types/#chatkit.types.AssistantMessageItem)s represent assistant responses. Content can include text, tool call outputs, widgets, and annotations. Text is Markdown-rendered and can carry inline annotations; see [Add annotations in assistant messages](../guides/add-features/add-annotations.md).
-
-### Markdown support
-
-Markdown in assistant messages supports:
-
-- GitHub-flavored Markdown: Lists, headings, code fences, inline code, blockquotes, links—all with streaming-friendly layout.
-- Lists: Ordered/unordered lists stay stable while streaming (Safari-safe markers, no reflow glitches).
-- Line breaks: Single newlines render as `<br>` when `breakNewLines` is enabled.
-- Code blocks: Syntax-highlighted, copyable, and streamed smoothly; copy buttons are always present.
-- Math: LaTeX via remark/rehype math plugins for inline and block equations.
-- Tables: Automatic sizing with horizontal scroll for wide outputs.
-- Inline annotations: Markdown directives spawn interactive annotations wired into ChatKit handlers.
-
-## Hidden context items
-
-Hidden context items serve as model input but are not rendered in the chat UI. Use them to pass non-visible signals (for example, widget actions or system context) so the model can respond to what the user did, not just what they typed.
-
-- [`HiddenContextItem`](../../api/chatkit/types/#chatkit.types.HiddenContextItem): Your integration’s hidden context; you control the schema and how it is converted for the model.
-- [`SDKHiddenContextItem`](../../api/chatkit/types/#chatkit.types.SDKHiddenContextItem): Hidden context inserted by the ChatKit Python SDK for its own operations; you normally leave it alone unless you override conversion behavior.
-
-
-## ThreadItemConverter
-
-[`ThreadItemConverter`](../../api/chatkit/agents/#chatkit.agents.ThreadItemConverter) maps stored thread items into model-ready input items. Defaults cover messages, widgets, workflows, and tasks; override it to handle attachments, tags, or hidden context in the format your model expects. Combine converter tweaks with prompting so the model sees a coherent view of rich items (for example, summarizing widgets or tasks into text the model can consume).
-
-## Thread item actions
-
-Thread item actions are quick action buttons attached to an assistant turn that let users act on the output, such as retrying, copying, or submitting feedback.
-
-They can be configured client-side with the [threadItemActions option](https://openai.github.io/chatkit-js/api/openai/chatkit-react/type-aliases/threaditemactionsoption/).
-
-
-## Related guides
-- [Persist ChatKit threads and messages](../guides/persist-chatkit-data.md)
-- [Compose model inputs](../guides/compose-model-input.md)
-- [Add annotations in assistant messages](../guides/add-features/add-annotations.md)
-- [Allow @-mentions in user messages](../guides/add-features/allow-mentions.md)
-- [Handle feedback](../guides/add-features/handle-feedback.md)
\ No newline at end of file
diff --git a/docs/concepts/thread-stream-events.md b/docs/concepts/thread-stream-events.md
index 7940e0d..1f38327 100644
--- a/docs/concepts/thread-stream-events.md
+++ b/docs/concepts/thread-stream-events.md
@@ -29,13 +29,13 @@ Stream [`ErrorEvent`](../../api/chatkit/types/#chatkit.types.ErrorEvent)s for us
 
 Stream [`ProgressUpdateEvent`](../../api/chatkit/types/#chatkit.types.ProgressUpdateEvent)s to show the user transient status while work is in flight.
 
-See [Show progress for long-running tools](../guides/add-features/show-progress-for-long-running-tools.md) for more info.
+See [Show progress for long-running tools](../guides/show-progress-for-long-running-tools.md) for more info.
 
 ## Client effects
 
 Use [`ClientEffectEvent`](../../api/chatkit/types/#chatkit.types.ClientEffectEvent) to trigger fire-and-forget behavior on the client such as opening a dialog or pushing updates.
 
-See [Send client effects](../guides/add-features/send-client-effects.md) for more info.
+See [Send client effects](../guides/send-client-effects.md) for more info.
 
 ## Stream options
 
diff --git a/docs/concepts/threads.md b/docs/concepts/threads.md
index 2de12ee..94570c3 100644
--- a/docs/concepts/threads.md
+++ b/docs/concepts/threads.md
@@ -1,16 +1,123 @@
-# Threads
+# Threads and items
 
-Threads are the core unit of ChatKit: a single conversation timeline that groups messages, tool calls, widgets, and related metadata.
+In ChatKit, a **thread** represents a single conversation. It is the unit that ties together everything that happens in that conversation: messages, widgets, actions, system signals, and metadata. A thread is stored as an ordered history of **thread items**, which ChatKit loads, paginates, and renders as needed.
 
-## Lifecycle
-- When a user submits a message and no thread exists, `ChatKitServer` creates one by calling your store's [`save_thread`](../../api/chatkit/store/#chatkit.store.Store.save_thread).
-- As responses stream back, `ChatKitServer` automatically persists thread items as they are completed—see [Thread items](thread-items.md) and [Stream responses back to your user](../guides/stream-thread-events.md) for how events drive storage.
-- Update titles or metadata intentionally in your integration (e.g., after summarizing a topic) by calling [`store.save_thread`](../../api/chatkit/store/#chatkit.store.Store.save_thread) with the new values.
-- When history is enabled client-side, ChatKit retrieves past threads. The user can continue any previous thread by default.
-- Archive or close threads according to your policies: mark them read-only (e.g., [disable new messages](../guides/add-features/disable-new-messages.md)) or delete them if you no longer want them discoverable.
+## What is a thread?
 
+A thread is an ordered timeline that contains:
+
+- Conversation history (user and assistant messages)
+- Structured content such as widgets and workflows
+- Internal signals that guide processing or model behavior
+- Metadata like titles or status flags
+
+Threads are persisted by your store implementation and can be updated, continued, or made read-only according to your application’s needs.
+
+## What are thread items?
+
+Thread items are the individual records that make up a thread. Each item represents one meaningful unit in the conversation history, such as:
+
+- A user message
+- An assistant response
+- A widget rendered by the assistant
+- A non-visible signal used only for model input
+
+ChatKit maintains the order of items, streams new ones as they are produced, and paginates them when history is loaded.
+
+## How threads are created and updated
+
+A typical thread lifecycle looks like this:
+
+- **Thread creation**: When a user submits a message and no thread exists yet, ChatKitServer creates one and persists it by calling your store’s `save_thread`.
+- **Appending items**: As the server streams a response, ChatKit persists thread items automatically as each item completes. Streaming events directly drive what gets stored.
+- **Updating metadata**: During respond, you can freely mutate the thread object (for example, to set or refine the title). ChatKit automatically persists these updates when the response completes. You can also call store.save_thread explicitly if needed.
+- **Loading history**: When history is enabled client-side, ChatKit retrieves past threads and their items. Users can continue an existing thread by default.
+- **Closing or archiving**: Threads can be marked read-only (for example, by disabling new messages) or deleted entirely if they should no longer be discoverable.
+
+
+## How thread items are used
+
+Thread items serve two primary purposes in ChatKit:
+
+### Model input
+
+Your server's [`respond`](../../api/chatkit/server/#chatkit.server.ChatKitServer.respond) logic reads thread items to construct input for the model input. This ensures the model sees the full conversational context both during an active response and when a user resumes a past thread. 
+
+See [Compose model input](../guides/compose-model-input.md).
+
+### UI rendering
+
+On the client, ChatKit.js renders items incrementally as they stream in for the active thread. When a past thread is loaded, the same persisted items are re-rendered to reconstruct the conversation UI.
+
+## Core item types
+
+### User messages
+
+[`UserMessageItem`](../../api/chatkit/types/#chatkit.types.UserMessageItem)s represent end-user input. They may include:
+
+- Plain text entered by the user
+- Optional `quoted_text` for reply-style UIs
+- Attachment metadata
+
+
+User text is not Markdown-rendered, but it may contain [@-mentions](../guides/allow-mentions.md) if your integration enables them.
+
+
+### Assistant messages
+
+[`AssistantMessageItem`](../../api/chatkit/types/#chatkit.types.AssistantMessageItem)s represent assistant output. Their content can include:
+
+- Markdown-rendered text
+- Tool call outputs
+- Widgets and structured UI elements
+- [Inline annotations](../guides/add-annotations.md)
+
+Assistant text supports rich Markdown and is rendered progressively as it streams.
+
+#### Markdown support
+
+Assistant messages support:
+
+- GitHub-flavored Markdown (headings, lists, code blocks, links, blockquotes)
+- Stable list rendering during streaming (Safari-safe, no reflow)
+- Optional single-newline line breaks
+- Syntax-highlighted, copyable code blocks
+- LaTeX math (inline and block)
+- Tables with automatic sizing and horizontal scrolling
+- Inline annotations that create interactive affordances in the UI
+
+### Hidden context items
+
+Hidden context items are included in model input but are not rendered in the chat UI. They allow the model to react to what happened in the interface, not just what the user typed.
+
+Typical use cases include recording widget actions, selection state, or system signals.
+
+- **[`HiddenContextItem`](../../api/chatkit/types/#chatkit.types.HiddenContextItem)**: Integration-defined hidden context. You control its schema and how it is converted for the model.
+
+- **[`SDKHiddenContextItem`](../../api/chatkit/types/#chatkit.types.SDKHiddenContextItem)**: Hidden context inserted by the ChatKit Python SDK for its own internal operations. Most applications do not need to modify this unless overriding conversion behavior.
+
+## Thread item actions
+
+Thread item actions are quick action buttons associated with an assistant turn. They let users act on the output—such as retrying a response, copying content, or submitting feedback.
+
+Actions are configured client-side using the [threadItemActions option](https://openai.github.io/chatkit-js/api/openai/chatkit-react/type-aliases/threaditemactionsoption/).
+
+## Converting items to model input
+
+[`ThreadItemConverter`](../../api/chatkit/agents/#chatkit.agents.ThreadItemConverter) translates stored thread items into model-ready input items. The default converter understands common ChatKit item types such as messages, widgets, workflows, and tasks.
+
+You can override the converter when you need custom behavior. For example:
+
+- Formatting attachments for the model
+- Translating tags or mentions into structured input
+- Summarizing rich widgets into text the model can consume
+
+Custom conversion is typically paired with prompting so the model receives a coherent representation of the conversation.
 
 ## Related guides
 - [Persist ChatKit threads and messages](../guides/persist-chatkit-data.md)
-- [Save thread titles](../guides/add-features/save-thread-titles.md)
-- [Disable new messages for a thread](../guides/add-features/disable-new-messages.md)
+- [Compose model inputs](../guides/compose-model-input.md)
+- [Add annotations in assistant messages](../guides/add-annotations.md)
+- [Allow @-mentions in user messages](../guides/allow-mentions.md)
+- [Handle feedback](../guides/handle-feedback.md)
+- [Let users browse past threads](../guides/browse-past-threads.md)
diff --git a/docs/concepts/widgets.md b/docs/concepts/widgets.md
index f0aa72f..8740266 100644
--- a/docs/concepts/widgets.md
+++ b/docs/concepts/widgets.md
@@ -31,7 +31,7 @@ Every widget must be wrapped in a root-level container element. For single, self
 
 ## .widget files
 
-Exported `.widget` files are JSON blobs that include the widget template, the expected data schema, and supporting metadata. You can load them server-side and render widgets dynamically with `WidgetTemplate`; see [Build widgets with `WidgetTemplate`](../guides/add-features/stream-widgets.md#build-widgets-with-widgettemplate) for examples.
+Exported `.widget` files are JSON blobs that include the widget template, the expected data schema, and supporting metadata. You can load them server-side and render widgets dynamically with `WidgetTemplate`; see [Build widgets with `WidgetTemplate`](../guides/stream-widgets.md#build-widgets-with-widgettemplate) for examples.
 
 ## WidgetItem
 
@@ -51,6 +51,6 @@ The [`entities.onRequestPreview`](https://openai.github.io/chatkit-js/api/openai
 
 ## Related guides
 
-- [Stream widgets](../guides/add-features/stream-widgets.md)
-- [Create custom forms](../guides/add-features/create-custom-forms.md)
-- [Handle widget actions](../guides/add-features/handle-widget-actions.md)
+- [Stream widgets](../guides/stream-widgets.md)
+- [Create custom forms](../guides/create-custom-forms.md)
+- [Handle widget actions](../guides/handle-widget-actions.md)
diff --git a/docs/guides/add-features/accept-attachments.md b/docs/guides/accept-attachments.md
similarity index 100%
rename from docs/guides/add-features/accept-attachments.md
rename to docs/guides/accept-attachments.md
diff --git a/docs/guides/add-features/add-annotations.md b/docs/guides/add-annotations.md
similarity index 100%
rename from docs/guides/add-features/add-annotations.md
rename to docs/guides/add-annotations.md
diff --git a/docs/guides/add-features/disable-new-messages.md b/docs/guides/add-features/disable-new-messages.md
deleted file mode 100644
index e3f66cf..0000000
--- a/docs/guides/add-features/disable-new-messages.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# Disable new messages for a thread
-
-There are two ways to stop new user messages: temporarily lock a thread or permanently close it when the conversation is finished.
-
-| State   | When to use                                    | Input UI                                       | What the user sees |
-|---------|------------------------------------------------|------------------------------------------------|--------------------|
-| Locked  | Temporary pause for moderation or admin action | Composer stays on screen but is disabled; the placeholder shows the lock reason. | The reason for the lock in the disabled composer. |
-| Closed  | Final state when the conversation is done      | The input UI is replaced with an informational banner.                  | A static default message or a custom reason, if provided. |
-
-## Update thread status (lock, close, or re-open)
-
-Update `thread.status`—whether moving between active, locked, or closed—and persist it.
-
-```python
-from chatkit.types import ActiveStatus, LockedStatus, ClosedStatus
-
-# lock
-thread.status = LockedStatus(reason="Escalated to support.")
-await store.save_thread(thread, context=context)
-
-# close (final)
-thread.status = ClosedStatus(reason="Resolved.")
-await store.save_thread(thread, context=context)
-
-# re-open
-thread.status = ActiveStatus()
-await store.save_thread(thread, context=context)
-```
-
-If you update the thread status within the `respond` method, ChatKit will emit a `ThreadUpdatedEvent` so connected clients update immediately.
-
-You can also update the thread status from a custom client-facing endpoint that updates the store directly (outside of the ChatKit server request flow). If the user is currently viewing the thread, have the client call `chatkit.fetchUpdates()` after the status is persisted so the UI picks up the latest thread state.
-
-## Block server-side work when locked or closed
-
-Thread status only affects the composer UI; ChatKitServer does not automatically reject actions, tool calls, or imperative message adds. Your integration should short-circuit handlers when a thread is disabled:
-
-```python
-class MyChatKitServer(...):
-  async def respond(thread, input_user_message, context):
-      if thread.status.type in {"locked", "closed"}:
-          return
-      # normal processing
-
-  async def action(thread, action, sender, context):
-      if thread.status.type in {"locked", "closed"}:
-          return
-      # normal processing
-```
-
diff --git a/docs/guides/add-features/save-thread-titles.md b/docs/guides/add-features/save-thread-titles.md
deleted file mode 100644
index 2c190bc..0000000
--- a/docs/guides/add-features/save-thread-titles.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Save thread titles
-
-Threads start untitled. Give them short titles so inboxes and client lists stay readable.
-
-## Save a title
-
-Update `thread.title` and call `store.save_thread(thread, context=...)`. Do this inside your streaming pipeline so ChatKit can emit the resulting `ThreadUpdatedEvent` to connected clients.
-
-```python
-class MyChatKitServer(ChatKitServer[RequestContext]):
-    async def respond(...):
-        ...
-        if not thread.title:
-            thread.title = "My Thread Title" 
-            await self.store.save_thread(thread, context=context)
-```
-
-If your integration writes titles elsewhere (for example, a separate FastAPI route that calls `store.save_thread` directly), have the client call `chatkit.fetchUpdates()` command afterward to pull the latest thread metadata.
-
-
-## Auto-generate a title
-
-Generate a concise title after the first assistant turn once you have enough context. Skip if the thread already has a title or if there isn’t enough content to summarize.
-
-```python
-class MyChatKitServer(ChatKitServer[RequestContext]):
-    async def respond(...):
-        updating_thread_title = asyncio.create_task(
-            self._maybe_update_thread_title(thread, item, context)
-        )
-
-        # Stream your main response
-        async for event in stream_agent_response(agent_context, result):
-            yield event
-
-        # Await so the title update streams back as a ThreadUpdatedEvent
-        await updating_thread_title
-
-    async def _maybe_update_thread_title(self, thread: ThreadMetadata, context: RequestContext):
-        if thread.title is not None:
-            return
-        items = await self.store.load_thread_items(
-            thread.id, after=None, limit=6, order="desc", context=context
-        )
-        thread.title = await generate_short_title(items.data) # your model call
-        await self.store.save_thread(thread, context=context)
-```
-
-Use any model call you like for `generate_short_title`: run a tiny Agent, a simple completion, or your own heuristic. Keep titles brief (for example, 3–6 words).
diff --git a/docs/guides/add-features/allow-mentions.md b/docs/guides/allow-mentions.md
similarity index 100%
rename from docs/guides/add-features/allow-mentions.md
rename to docs/guides/allow-mentions.md
diff --git a/docs/guides/browse-past-threads.md b/docs/guides/browse-past-threads.md
new file mode 100644
index 0000000..1f519d2
--- /dev/null
+++ b/docs/guides/browse-past-threads.md
@@ -0,0 +1,139 @@
+# Let users browse past threads
+
+Let users return to previous conversations, see readable titles in a history list, and decide which threads can be continued.
+
+## Enable thread history in the client
+
+The ChatKit React hooks support a built-in history view that lists past threads. History is enabled by default, but you can configure it explicitly when you create your ChatKit controller:
+
+```tsx
+const chatkit = useChatKit({
+  // ...
+  history: {
+    enabled: true,
+    showDelete: true,
+    showRename: true,
+  },
+});
+```
+
+With `history.enabled: true`, ChatKit.js will:
+
+- Fetch threads from your ChatKit server.
+- Show them in a history list using `thread.title` when available.
+- Let users click a past thread to load its items and continue the conversation.
+- Let users delete and rename threads.
+
+Set `history.enabled: false` if you want a single-thread, stateless chat experience with no history UI.
+
+## Show readable titles in history
+
+Threads start untitled. Give them short, descriptive titles so the history list is easy to scan.
+
+### Set a title directly
+
+Set `thread.title` on the server and persist it with your store:
+
+```python
+from chatkit.server import ChatKitServer
+
+
+class MyChatKitServer(ChatKitServer[RequestContext]):
+    async def respond(...):
+        ...
+        if not thread.title:
+            thread.title = "Order #1234"
+            await self.store.save_thread(thread, context=context)
+```
+
+ChatKit will emit a `ThreadUpdatedEvent` so connected clients update the title in their history views.
+
+### Auto-generate a title after the first turn
+
+Generate a concise title after the first assistant turn once you have enough context. Skip if the thread already has a title or if there isn’t enough content to summarize.
+
+```python
+class MyChatKitServer(ChatKitServer[RequestContext]):
+    async def respond(...):
+        updating_thread_title = asyncio.create_task(
+            self._maybe_update_thread_title(thread, context)
+        )
+
+        # Stream your main response
+        async for event in stream_agent_response(agent_context, result):
+            yield event
+
+        # Await so the title update streams back as a ThreadUpdatedEvent
+        await updating_thread_title
+
+    async def _maybe_update_thread_title(
+        self, thread: ThreadMetadata, context: RequestContext
+    ) -> None:
+        if thread.title is not None:
+            return
+        items = await self.store.load_thread_items(
+            thread.id,
+            after=None,
+            limit=6,
+            order="desc",
+            context=context,
+        )
+        thread.title = await generate_short_title(items.data)  # your model call
+        await self.store.save_thread(thread, context=context)
+```
+
+Use any model call you like for `generate_short_title`: run a tiny Agent, a simple completion, or your own heuristic. Keep titles brief (for example, 3–6 words).
+
+## Decide which threads can be continued
+
+By default, users can continue any past thread: selecting it in the history view loads its items and reuses the same thread when they send a new message.
+
+Use `thread.status` to mark conversations that should no longer accept new messages. Locked and closed threads still appear in history, but the composer UI changes.
+
+There are two ways to stop new user messages: temporarily lock a thread or permanently close it when the conversation is finished.
+
+| State   | When to use                                    | Input UI                                       | What the user sees |
+|---------|------------------------------------------------|------------------------------------------------|--------------------|
+| Locked  | Temporary pause for moderation or admin action | Composer stays on screen but is disabled; the placeholder shows the lock reason. | The reason for the lock in the disabled composer. |
+| Closed  | Final state when the conversation is done      | The input UI is replaced with an informational banner.                  | A static default message or a custom reason, if provided. |
+
+### Update thread status (lock, close, or re-open)
+
+
+```python
+from chatkit.types import ActiveStatus, LockedStatus, ClosedStatus
+
+# lock (temporary pause)
+thread.status = LockedStatus(reason="Escalated to support.")
+await store.save_thread(thread, context=context)
+
+# close (final state)
+thread.status = ClosedStatus(reason="Resolved.")
+await store.save_thread(thread, context=context)
+
+# re-open
+thread.status = ActiveStatus()
+await store.save_thread(thread, context=context)
+```
+
+When you persist a new status during `respond`, ChatKit emits a `ThreadUpdatedEvent` so all viewers see the updated state.
+
+You can also update the thread status from a custom client-facing endpoint that updates the store directly (outside of the ChatKit server request flow). If the user is currently viewing the thread, have the client call `chatkit.fetchUpdates()` after the status is persisted so the UI picks up the latest thread state.
+
+### Block server-side work when locked or closed
+
+Thread status only affects the composer UI; `ChatKitServer` does not automatically reject actions, tool calls, or imperative message adds. Your integration should short-circuit handlers when a thread is disabled:
+
+```python
+class MyChatKitServer(...):
+    async def respond(thread, input_user_message, context):
+        if thread.status.type in {"locked", "closed"}:
+            return
+        # normal processing
+
+    async def action(thread, action, sender, context):
+        if thread.status.type in {"locked", "closed"}:
+            return
+        # normal processing
+```
+
diff --git a/docs/guides/compose-model-input.md b/docs/guides/compose-model-input.md
index 9f8fa23..e329f9d 100644
--- a/docs/guides/compose-model-input.md
+++ b/docs/guides/compose-model-input.md
@@ -1,89 +1,3 @@
 # Compose model inputs
 
-ChatKit delivers structured thread items (messages, tools, attachments). Before running inference, convert those items into the model's expected input format.
-
-## Load recent thread items
-
-Make the agent aware of recent context before converting input. Load recent thread items and pass them along with the new message so the model sees the conversation state.
-
-```python
-# Inside ChatKitServer.respond(...)
-items_page = await self.store.load_thread_items(
-    thread.id,
-    after=None,
-    limit=20,
-    order="desc",
-    context=context,
-)
-items = list(reversed(items_page.data))
-```
-
-## Use default conversion helpers
-
-Start with the defaults: `simple_to_agent_input` converts a `UserMessageItem` into Agents SDK inputs, and `ThreadItemConverter` lets you override specific conversions when you need more control. Combine the converted user input with the `items` you loaded above to send the model both the latest message and recent thread context.
-
-```python
-from agents import Agent, Runner
-from chatkit.agents import AgentContext, simple_to_agent_input, stream_agent_response
-
-
-async def respond(
-    self,
-    thread: ThreadMetadata,
-    input: UserMessageItem | None,
-    context: Any,
-) -> AsyncIterator[ThreadStreamEvent]:
-    # Assume `items` was loaded as shown in the previous section.
-
-    input_items = await simple_to_agent_input(items)
-    agent_context = AgentContext(thread=thread, store=self.store, request_context=context)
-    result = Runner.run_streamed(
-        assistant_agent,
-        input_items,
-        context=agent_context,
-    )
-```
-
-See [Stream thread events](stream-thread-events.md) for how to stream the resulting events back to the client.
-
-## Customize a ThreadItemConverter
-
-Extend `ThreadItemConverter` when the defaults do not match your agent instructions (e.g. your prompt expects special tags around hidden context or tasks) or when you persist items the simple converter does not cover, such as @-mentions (entity tagging) or attachments. The example below wraps hidden context in a dedicated system message so the model treats it as internal-only guidance.
-
-```python
-class MyConverter(ThreadItemConverter):
-    async def hidden_context_to_input(
-        self, item: HiddenContextItem
-    ) -> Message:
-        text = (
-            "DO NOT SHOW TO USER. Internal context for the assistant:\n"
-            f"<context>\n{item.content}\n</context>"
-        )
-        return Message(
-            type="message",
-            role="system",
-            content=[
-                ResponseInputTextParam(
-                    type="input_text",
-                    text=text,
-                )
-            ],
-        )
-```
-
-You can also override methods like `attachment_to_message_content` or `tag_to_message_content` to translate @-mentions or attachments into model-readable text.
-
-## Interpret inference options
-
-When you have specified composer options for tools or models in ChatKit.js, user-selected model or tool settings arrive as `input.inference_options`. Pass them through to your model runner—or even switch which agent you invoke—so the experience follows the user's choices.
-
-```python
-if input and input.inference_options:
-    model = input.inference_options.model
-    tool_choice = input.inference_options.tool_choice
-    # forward these into your inference call
-```
-
-## Next
-
-[Run inference](run-inference.md)
\ No newline at end of file
+This guide has been consolidated into [Respond to a user message](respond-to-user-message.md).
diff --git a/docs/guides/add-features/create-custom-forms.md b/docs/guides/create-custom-forms.md
similarity index 100%
rename from docs/guides/add-features/create-custom-forms.md
rename to docs/guides/create-custom-forms.md
diff --git a/docs/guides/add-features/handle-feedback.md b/docs/guides/handle-feedback.md
similarity index 100%
rename from docs/guides/add-features/handle-feedback.md
rename to docs/guides/handle-feedback.md
diff --git a/docs/guides/add-features/handle-widget-actions.md b/docs/guides/handle-widget-actions.md
similarity index 100%
rename from docs/guides/add-features/handle-widget-actions.md
rename to docs/guides/handle-widget-actions.md
diff --git a/docs/guides/add-features/make-client-tool-calls.md b/docs/guides/make-client-tool-calls.md
similarity index 100%
rename from docs/guides/add-features/make-client-tool-calls.md
rename to docs/guides/make-client-tool-calls.md
diff --git a/docs/guides/persist-chatkit-data.md b/docs/guides/persist-chatkit-data.md
index c1b51f6..3421480 100644
--- a/docs/guides/persist-chatkit-data.md
+++ b/docs/guides/persist-chatkit-data.md
@@ -1,117 +1,3 @@
 # Persist ChatKit threads and messages
 
-Implement the `Store` interface to control how threads, messages, tool calls, and widgets are stored. Prefer serializing thread items as JSON so schema changes in future releases do not break your storage.
-
-## Implement a Store
-
-Example `Store` backed by Postgres and `psycopg`:
-
-```python
-class MyPostgresStore(Store[RequestContext]):
-    """Chat data store backed by Postgres."""
-
-    def __init__(self, conninfo: str) -> None:
-        self._conninfo = conninfo
-        self._init_schema()
-
-    @contextmanager
-    def _connection(self) -> Iterator[psycopg.Connection]:
-        # Uses blocking psycopg for simplicity.
-        # In production async servers, consider an async driver or connection pool.
-        with psycopg.connect(self._conninfo) as conn:
-            yield conn
-
-    def _init_schema(self) -> None:
-        with self._connection() as conn, conn.cursor() as cur:
-            # Threads are typically queried by (user_id, created_at),
-            # so you may want to add an index on those columns in production.
-            cur.execute(
-                """
-                CREATE TABLE IF NOT EXISTS threads (
-                    id TEXT PRIMARY KEY,
-                    user_id TEXT NOT NULL,
-                    created_at TIMESTAMPTZ NOT NULL,
-                    data JSONB NOT NULL
-                );
-                """
-            )
-
-            # Items are typically streamed by (thread_id, created_at) and
-            # sometimes filtered by user_id, so add indexes accordingly in production.
-            cur.execute(
-                """
-                CREATE TABLE IF NOT EXISTS items (
-                    id TEXT PRIMARY KEY,
-                    thread_id TEXT NOT NULL
-                        REFERENCES threads (id)
-                        ON DELETE CASCADE,
-                    user_id TEXT NOT NULL,
-                    created_at TIMESTAMPTZ NOT NULL,
-                    data JSONB NOT NULL
-                );
-                """
-            )
-
-            conn.commit()
-
-    async def load_thread(
-        self, thread_id: str, context: RequestContext
-    ) -> ThreadMetadata:
-        with self._connection() as conn, conn.cursor(row_factory=tuple_row) as cur:
-            cur.execute(
-                "SELECT data FROM threads WHERE id = %s AND user_id = %s",
-                (thread_id, context.user_id),
-            )
-            row = cur.fetchone()
-            if row is None:
-                raise NotFoundError(f"Thread {thread_id} not found")
-
-            return ThreadMetadata.model_validate(row[0])
-
-    async def save_thread(
-        self, thread: ThreadMetadata, context: RequestContext
-    ) -> None:
-        payload = thread.model_dump(mode="json")
-
-        with self._connection() as conn, conn.cursor() as cur:
-            cur.execute(
-                """
-                INSERT INTO threads (id, user_id, created_at, data)
-                VALUES (%s, %s, %s, %s)
-                """,
-                (thread.id, context.user_id, thread.created_at, payload),
-            )
-            conn.commit()
-
-    # Remaining Store methods follow the same pattern
-```
-
-See the [`Store` interface](../../api/chatkit/store/#chatkit.store.Store) for the full list of required methods.
-
-### Customize ID generation
-
-If you need custom thread or item IDs you can override the store's ID generation methods `generate_thread_id` and `generate_item_id`.
-
-This is useful when integrating with an external ID system, enforcing a specific ID format, or requiring deterministic or cross-service–unique IDs.
-
-For most applications, the default implementations are sufficient.
-
-### Store thread metadata
-
-`ThreadMetadata` can hold arbitrary, non-UI data needed for your application such as the last `previous_response_id` or customer identifiers.
-
-```python
-previous_response_id = thread.metadata.get("previous_response_id")
-
-result = Runner.run_streamed(
-    agent,
-    input=...,
-    previous_response_id=previous_response_id,
-)
-
-thread.metadata["previous_response_id"] = result.response_id
-```
-
-## Next
-
-[Compose model input](compose-model-input.md)
\ No newline at end of file
+This guide has been consolidated into [Respond to a user message](respond-to-user-message.md).
diff --git a/docs/guides/respond-to-user-message.md b/docs/guides/respond-to-user-message.md
new file mode 100644
index 0000000..f124f0c
--- /dev/null
+++ b/docs/guides/respond-to-user-message.md
@@ -0,0 +1,280 @@
+# Respond to a user message
+
+This guide covers how to implement and run a ChatKit server that responds to user messages, including thread loading, inference, event streaming, and persistence.
+
+## Install ChatKit
+
+Install the SDK from PyPI:
+
+```bash
+pip install openai-chatkit
+```
+
+## Build and run your ChatKit server
+
+Your ChatKit server does three main things:
+
+1. Accept HTTP requests from your client.
+2. Construct a request context (user id, auth, feature flags, etc.).
+3. Call `ChatKitServer.respond` to produce streamed events.
+
+### Define a request context
+
+First, define a small context object that will be created per request and passed through your server, store, and agents:
+
+```python
+from dataclasses import dataclass
+
+
+@dataclass
+class MyRequestContext:
+    user_id: str
+```
+
+### Implement your `ChatKitServer`
+
+Subclass `ChatKitServer` and implement `respond`. It runs once per user turn and should yield the events that make up your response. We’ll keep this example simple for now and fill in history loading and model calls in later sections.
+
+```python
+from collections.abc import AsyncIterator
+from datetime import datetime
+
+from chatkit.server import ChatKitServer
+from chatkit.types import (
+    AssistantMessageContent,
+    AssistantMessageItem,
+    ThreadItemDoneEvent,
+    ThreadMetadata,
+    ThreadStreamEvent,
+    UserMessageItem,
+)
+
+
+class MyChatKitServer(ChatKitServer[MyRequestContext]):
+    async def respond(
+        self,
+        thread: ThreadMetadata,
+        input: UserMessageItem | None,
+        context: MyRequestContext,
+    ) -> AsyncIterator[ThreadStreamEvent]:
+        # Replace this with your inference pipeline.
+        yield ThreadItemDoneEvent(
+            item=AssistantMessageItem(
+                thread_id=thread.id,
+                id=self.store.generate_item_id("message", thread, context),
+                created_at=datetime.now(),
+                content=[AssistantMessageContent(text="Hi there!")],
+            )
+        )
+```
+
+### Wire ChatKit to your web framework
+
+Expose a single `/chatkit` endpoint that forwards requests to your `MyChatKitServer` instance. For example, with FastAPI:
+
+```python
+from fastapi import FastAPI, Request, Response
+from fastapi.responses import StreamingResponse
+
+from chatkit.server import ChatKitServer, StreamingResult
+
+app = FastAPI()
+store = MyPostgresStore(conn_info)
+server = MyChatKitServer(store)
+
+
+@app.post("/chatkit")
+async def chatkit_endpoint(request: Request):
+    # Build a per-request context from the incoming HTTP request.
+    context = MyRequestContext(user_id="abc123")
+
+    # Let ChatKit handle the request and return either a streaming or JSON result.
+    result = await server.process(await request.body(), context)
+    if isinstance(result, StreamingResult):
+        return StreamingResponse(result, media_type="text/event-stream")
+    return Response(content=result.json, media_type="application/json")
+```
+
+### How request context flows into ChatKit
+
+`ChatKitServer[TContext]` and `Store[TContext]` are generic over a request context type you choose (user id, org, auth scopes, feature flags). Construct it per request and pass it to `server.process`; it flows into `respond` and your store methods.
+
+```python
+context = MyRequestContext(user_id="abc123")
+result = await server.process(await request.body(), context)
+```
+
+Request metadata in the payload is available before calling `process`; include it in your context for auth, tracing, or feature flags.
+
+## Implement your ChatKit data store
+
+Implement the `Store` interface to control how threads, messages, tool calls, and widgets are stored. Prefer serializing thread items as JSON so schema changes do not break storage. Example Postgres store:
+
+```python
+class MyPostgresStore(Store[RequestContext]):
+    def __init__(self, conninfo: str) -> None:
+        self._conninfo = conninfo
+        self._init_schema()
+
+    def _init_schema(self) -> None:
+        with self._connection() as conn, conn.cursor() as cur:
+            cur.execute(
+                """
+                CREATE TABLE IF NOT EXISTS threads (
+                    id TEXT PRIMARY KEY,
+                    user_id TEXT NOT NULL,
+                    created_at TIMESTAMPTZ NOT NULL,
+                    data JSONB NOT NULL
+                );
+                """
+            )
+
+            cur.execute(
+                """
+                CREATE TABLE IF NOT EXISTS items (
+                    id TEXT PRIMARY KEY,
+                    thread_id TEXT NOT NULL
+                        REFERENCES threads (id)
+                        ON DELETE CASCADE,
+                    user_id TEXT NOT NULL,
+                    created_at TIMESTAMPTZ NOT NULL,
+                    data JSONB NOT NULL
+                );
+                """
+            )
+
+            conn.commit()
+
+    async def load_thread(
+        self, thread_id: str, context: RequestContext
+    ) -> ThreadMetadata:
+        with self._connection() as conn, conn.cursor(row_factory=tuple_row) as cur:
+            cur.execute(
+                "SELECT data FROM threads WHERE id = %s AND user_id = %s",
+                (thread_id, context.user_id),
+            )
+            row = cur.fetchone()
+            if row is None:
+                raise NotFoundError(f"Thread {thread_id} not found")
+            return ThreadMetadata.model_validate(row[0])
+
+    async def save_thread(
+        self, thread: ThreadMetadata, context: RequestContext
+    ) -> None:
+        payload = thread.model_dump(mode="json")
+        with self._connection() as conn, conn.cursor() as cur:
+            cur.execute(
+                """
+                INSERT INTO threads (id, user_id, created_at, data)
+                VALUES (%s, %s, %s, %s)
+                """,
+                (thread.id, context.user_id, thread.created_at, payload),
+            )
+            conn.commit()
+
+    # Implement the remaining Store methods following the same pattern.
+```
+
+Customize ID generation by overriding `generate_thread_id` and `generate_item_id` if you need external or deterministic IDs. Store metadata such as `previous_response_id` on `ThreadMetadata` to drive your inference pipeline.
+
+## Generate a response using your model
+
+Inside `respond`, you’ll usually:
+
+1. Load recent thread history.
+2. Prepare model input for your agent.
+3. Run inference and stream events back to the client.
+
+### Load thread history inside `respond`
+
+Fetch recent items so the model sees the conversation state before you build the next turn:
+
+```python
+items_page = await self.store.load_thread_items(
+    thread.id,
+    after=None,
+    limit=20,  # Tune this limit based on your model/context budget.
+    order="desc",
+    context=context,
+)
+items = list(reversed(items_page.data))
+```
+
+### Prepare model input
+
+Use the defaults first: `simple_to_agent_input` converts user items into Agents SDK inputs, and `ThreadItemConverter` handles other item types. Override converter methods if you need special handling for hidden context, attachments, or tags.
+
+Respect any `input.inference_options` the client sends (model, tool choice, etc.) when you build your request to the model.
+
+```python
+from agents import Runner
+from chatkit.agents import AgentContext, simple_to_agent_input
+
+input_items = await simple_to_agent_input(items)
+agent_context = AgentContext(
+    thread=thread,
+    store=self.store,
+    request_context=context,
+)
+```
+
+### Run inference and stream events
+
+Run your agent and stream events back to the client. `stream_agent_response` converts an Agents run into ChatKit events; you can also yield events manually.
+
+```python
+from agents import (
+    InputGuardrailTripwireTriggered,
+    OutputGuardrailTripwireTriggered,
+    Runner,
+)
+from chatkit.agents import stream_agent_response
+from chatkit.types import ErrorEvent
+
+result = Runner.run_streamed(
+    assistant_agent,
+    input_items,
+    context=agent_context,
+)
+
+try:
+    async for event in stream_agent_response(agent_context, result):
+        yield event
+except InputGuardrailTripwireTriggered:
+    yield ErrorEvent(message="We blocked that message for safety.")
+except OutputGuardrailTripwireTriggered:
+    yield ErrorEvent(
+        message="The assistant response was blocked.",
+        allow_retry=False,
+    )
+```
+
+To stream events from a server tool during the same turn, use `ctx.context.stream(...)` inside the tool:
+
+```python
+from agents import RunContextWrapper, function_tool
+from chatkit.agents import AgentContext
+from chatkit.types import ProgressUpdateEvent
+
+
+@function_tool()
+async def load_document(ctx: RunContextWrapper[AgentContext], document_id: str):
+    await ctx.context.stream(ProgressUpdateEvent(icon="document", text="Loading document..."))
+    return await get_document_by_id(document_id)
+```
+
+`stream_agent_response` will forward these events alongside any assistant text or tool call updates. Client tool calls are also supported via `ctx.context.client_tool_call` when you register the tool on both client and server.
+
+## Next: add features
+
+- [Let users browse past threads](browse-past-threads.md)
+- [Accept attachments](accept-attachments.md)
+- [Make client tool calls](make-client-tool-calls.md)
+- [Send client effects](send-client-effects.md)
+- [Show progress for long-running tools](show-progress-for-long-running-tools.md)
+- [Stream widgets](stream-widgets.md)
+- [Handle widget actions](handle-widget-actions.md)
+- [Create custom forms](create-custom-forms.md)
+- [Handle feedback](handle-feedback.md)
+- [Allow @-mentions in user messages](allow-mentions.md)
+- [Add annotations in assistant messages](add-annotations.md)
diff --git a/docs/guides/run-inference.md b/docs/guides/run-inference.md
index 258a07d..a72703c 100644
--- a/docs/guides/run-inference.md
+++ b/docs/guides/run-inference.md
@@ -1,71 +1,3 @@
 # Run inference
 
-The Agents SDK is the officially supported way to run inference with ChatKit and stream results back, but it is not mandatory. Any pipeline that yields `ThreadStreamEvent`s will work.
-
-If you are not using Agents SDK, emit `ThreadStreamEvent`s yourself from `respond`. Assistant messages, tool status, notices, and widgets are all first-class events; see [Stream thread events](stream-thread-events.md) for patterns.
-
-## Access ChatKit helpers inside tools
-
-`AgentContext` is passed through to server tool calls (via `RunContextWrapper`) so tools can stream events or use the store. For example, use `ctx.context.stream(...)` to update the UI while a tool runs (more details in [Stream thread events](stream-thread-events.md)), or `ctx.context.store` to load or persist thread data during tool execution.
-
-Attach server tools to your agent as usual; each tool receives the same `AgentContext` you constructed before running inference, giving it access to the current thread, store, and request context.
-
-You can subclass `AgentContext` to add app-specific context that tools and agents can use directly, such as a separate data store.
-
-```python
-class MyAgentContext(AgentContext[RequestContext]):
-    data_store: MyDataStore
-    analytics: AnalyticsClient
-
-
-async def respond(...):
-    agent_context = MyAgentContext(
-        thread=thread,
-        store=self.store,            # your ChatKit data store
-        request_context=context,     # your ChatKit request context (headers, auth)
-        data_store=self.data_store,  # example addition: app-specific store
-        analytics=self.analytics,    # example addition: app-specific service
-    )
-    result = Runner.run_streamed(
-        assistant_agent,
-        input_items,
-        context=agent_context,
-    )
-```
-
-Tools now receive `ctx.context` typed as `MyAgentContext`, so they can read or write app state without extra plumbing.
-
-## Client tool calls
-
-Client tool calls mirror server tool calls, except they seamlessly invoke a ChatKit.js client callback you registered on the frontend while inference runs. Trigger one by setting `ctx.context.client_tool_call` inside a tool and registering the same tool on both client and server.
-
-Only one client tool call is allowed per turn, and the agent must stop at the tool before continuing. See also [Use client tool calls](add-features/use-client-tool-calls.md).
-
-```python
-@function_tool(description_override="Add an item to the user's todo list.")
-async def add_to_todo_list(ctx: RunContextWrapper[AgentContext], item: str) -> None:
-    ctx.context.client_tool_call = ClientToolCall(
-        name="add_to_todo_list",
-        arguments={"item": item},
-    )
-
-assistant_agent = Agent[AgentContext](
-    model="gpt-5",
-    name="Assistant",
-    instructions="You are a helpful assistant",
-    tools=[add_to_todo_list],
-    tool_use_behavior=StopAtTools(stop_at_tool_names=[add_to_todo_list.name]),
-)
-```
-
-## Send agent reference content
-
-You can supply additional reference context to the model at inference time using server tools, client tools, or manual input injection. Choose the mechanism based on where the data lives and who owns it.
-
-- Use server tools when the reference content lives on the backend and can be retrieved during inference.
-- Use client tool calls when the browser or app must supply transient state (for example, active UI selections).
-- Manually inject additional model input items when the reference content is already available at inference time and your application is latency-sensitive.
-
-## Next
-
-[Stream responses back to your user](stream-thread-events.md)
\ No newline at end of file
+This guide has been consolidated into [Respond to a user message](respond-to-user-message.md).
diff --git a/docs/guides/add-features/send-client-effects.md b/docs/guides/send-client-effects.md
similarity index 100%
rename from docs/guides/add-features/send-client-effects.md
rename to docs/guides/send-client-effects.md
diff --git a/docs/guides/serve-chatkit.md b/docs/guides/serve-chatkit.md
index 079fb31..be82fd7 100644
--- a/docs/guides/serve-chatkit.md
+++ b/docs/guides/serve-chatkit.md
@@ -1,98 +1,3 @@
 # Serve ChatKit from your backend
 
-ChatKit's server integration is intentionally small: implement a `ChatKitServer`, wire up a single POST endpoint, and stream `ThreadStreamEvent`s back to the client. You decide where to run the server and how to authenticate requests.
-
-## Install the SDK
-
-Install the `openai-chatkit` package:
-
-```bash
-pip install openai-chatkit
-```
-
-## Implement a ChatKit server
-
-Subclass `ChatKitServer` and implement `respond`. This method runs every time a user sends a message and should stream back the events that make up your response (assistant messages, tool calls, workflows, tasks, widgets, and so on).
-
-```python
-from collections.abc import AsyncIterator
-from dataclasses import dataclass
-from datetime import datetime
-
-from chatkit.server import ChatKitServer
-from chatkit.types import (
-    AssistantMessageContent,
-    AssistantMessageItem,
-    ThreadItemDoneEvent,
-    ThreadMetadata,
-    ThreadStreamEvent,
-    UserMessageItem,
-)
-
-
-@dataclass
-class MyRequestContext:
-    user_id: str
-
-
-class MyChatKitServer(ChatKitServer[MyRequestContext]):
-    async def respond(
-        self,
-        thread: ThreadMetadata,
-        input: UserMessageItem | None,
-        context: MyRequestContext,
-    ) -> AsyncIterator[ThreadStreamEvent]:
-        # Replace this with your inference pipeline.
-        yield ThreadItemDoneEvent(
-            item=AssistantMessageItem(
-                thread_id=thread.id,
-                id=self.store.generate_item_id("message", thread, context),
-                created_at=datetime.now(),
-                content=[AssistantMessageContent(text="Hi there!")],
-            )
-        )
-```
-
-## Pass request context into ChatKit
-
-`ChatKitServer[TContext]` and `Store[TContext]` are generic over a request context type you choose. Your context carries caller-specific data (for example user id, org, auth scopes, feature flags) into `ChatKitServer.respond` and your `Store`. Define a lightweight type and pass it through when you call `server.process`.
-
-```python
-context = MyRequestContext(user_id="abc123")
-result = await server.process(await request.body(), context)
-```
-
-## Expose the ChatKit endpoint
-
-ChatKit is framework-agnostic. Expose a single POST endpoint that returns JSON or streams server‑sent events (SSE).
-
-Example using ChatKit with FastAPI:
-
-```python
-from fastapi import FastAPI, Request, Response
-from fastapi.responses import StreamingResponse
-from chatkit.server import ChatKitServer, StreamingResult
-
-app = FastAPI()
-data_store = MyPostgresStore(conn_info)
-server = MyChatKitServer(data_store)
-
-
-@app.post("/chatkit")
-async def chatkit_endpoint(request: Request):
-    context = MyRequestContext(...)
-    result = await server.process(await request.body(), context)
-    if isinstance(result, StreamingResult):
-        return StreamingResponse(result, media_type="text/event-stream")
-    return Response(content=result.json, media_type="application/json")
-```
-
-### (Optional) Pass through request metadata
-
-Every ChatKit request payload includes a `metadata` field you can use to carry per-request context from the client.
-
-Pull it from the request in your endpoint before calling server.process to use it for auth/tracing/business logic there, or to include it in the context you pass through so respond and tools can read it.
-
-## Next
-
-[Persist ChatKit threads and messages](persist-chatkit-data.md)
\ No newline at end of file
+This guide has been consolidated into [Respond to a user message](respond-to-user-message.md).
diff --git a/docs/guides/add-features/show-progress-for-long-running-tools.md b/docs/guides/show-progress-for-long-running-tools.md
similarity index 100%
rename from docs/guides/add-features/show-progress-for-long-running-tools.md
rename to docs/guides/show-progress-for-long-running-tools.md
diff --git a/docs/guides/stream-thread-events.md b/docs/guides/stream-thread-events.md
index 3adf9be..a6bb6c0 100644
--- a/docs/guides/stream-thread-events.md
+++ b/docs/guides/stream-thread-events.md
@@ -1,104 +1,3 @@
 # Stream responses back to your user
 
-ChatKit.js listens for [`ThreadStreamEvent`](../../api/chatkit/types/#chatkit.types.ThreadStreamEvent)s over SSE. Stream events from [`ChatKitServer.respond`](../../api/chatkit/server/#chatkit.server.ChatKitServer.respond) so users see model output, tool activity, progress updates, and errors in real time.
-
-Thread stream events include both persistent thread items (messages, tools, workflows) that are saved to the conversation history, and non-persistent runtime signals (progress updates, notices, errors, and client effects) that show ephemeral UI or drive immediate client behavior without being stored.
-
-See [Thread stream events](../concepts/thread-stream-events.md) for an overview of supported event types.
-
-### From `respond`
-
-`stream_agent_response` converts a streamed Agents SDK run into ChatKit events. Yield those events directly from `respond`, or yield any `ThreadStreamEvent` yourself—the server processes them the same way.
-
-Example using `stream_agent_response` with a run result:
-
-```python
-class MyChatKitServer(ChatKitServer[MyRequestContext]):
-    async def respond(...) -> AsyncIterator[ThreadStreamEvent]:
-        # Build model inputs and agent context as shown in previous guides.
-
-        result = Runner.run_streamed(...)
-        async for event in stream_agent_response(agent_context, result):
-            yield event
-```
-
-### From tools
-
-Server tools enqueue events with the `AgentContext` helpers; `stream_agent_response` drains and forwards them.
-
-Example emitting an ephemeral progress update event during a tool call:
-
-```python
-@function_tool()
-async def long_running_tool(ctx: RunContextWrapper[AgentContext]):
-    await ctx.context.stream(ProgressUpdateEvent(text="Working..."))
-
-    # Tool logic omitted for brevity
-```
-
-### Handle guardrail triggers
-
-Guardrail tripwires raise `InputGuardrailTripwireTriggered` or `OutputGuardrailTripwireTriggered` once partial assistant output has been rolled back. Catch them around `stream_agent_response` and optionally send a user-facing event so the client knows why the turn stopped.
-
-```python
-from agents import InputGuardrailTripwireTriggered, OutputGuardrailTripwireTriggered
-from chatkit.types import ErrorEvent
-
-try:
-    async for event in stream_agent_response(agent_context, result):
-        yield event
-except InputGuardrailTripwireTriggered:
-    yield ErrorEvent(message="We blocked that message for safety.")
-except OutputGuardrailTripwireTriggered:
-    yield ErrorEvent(
-        message="The assistant response was blocked.",
-        allow_retry=False,
-    )
-```
-
-## Stream events without `stream_agent_response`
-
-You can bypass the Agents SDK helper and yield `ThreadStreamEvent`s directly from `respond`. ChatKitServer will persist and route them the same way.
-
-```python
-class MyChatKitServer(ChatKitServer[MyRequestContext]):
-    async def respond(...) -> AsyncIterator[ThreadStreamEvent]:
-        # Example transient progress update
-        yield ProgressUpdateEvent(
-            icon="search",
-            text="Searching..."
-        )
-
-        # Run your inference pipeline here
-        output = await run_inference(thread, input, context)
-
-        # Stream a persisted assistant message
-        yield ThreadItemDoneEvent(
-            item=AssistantMessageItem(
-                thread_id=thread.id,
-                id=self.store.generate_item_id("message", thread, context),
-                created_at=datetime.now(),
-                content=[AssistantMessageContent(text=output)],
-            )
-        )
-```
-
-When you stream events manually, remember that tools cannot `yield` events. If you skip `stream_agent_response`, you must merge any tool-emitted events yourself—for example, by reading from `AgentContext._events` (populated by `ctx.context.stream(...)` or workflow helpers) and interleaving them with your own `respond` events.
-
-
-## Next
-
-Add features:
-
-* [Save thread titles](add-features/save-thread-titles.md)
-* [Accept attachments](add-features/accept-attachments.md)
-* [Make client tool calls](add-features/make-client-tool-calls.md)
-* [Send client effects](add-features/send-client-effects.md)
-* [Show progress for long-running tools](add-features/show-progress-for-long-running-tools.md)
-* [Stream widgets](add-features/stream-widgets.md)
-* [Handle widget actionss](add-features/handle-widget-actions.md)
-* [Create custom forms](add-features/create-custom-forms.md)
-* [Handle feedback](add-features/handle-feedback.md)
-* [Allow @-mentions in user messages](add-features/allow-mentions.md)
-* [Add annotations in assistant messages](add-features/add-annotations.md)
-* [Disable new messages for a thread](add-features/disable-new-messages.md)
+This guide has been consolidated into [Respond to a user message](respond-to-user-message.md).
diff --git a/docs/guides/add-features/stream-widgets.md b/docs/guides/stream-widgets.md
similarity index 100%
rename from docs/guides/add-features/stream-widgets.md
rename to docs/guides/stream-widgets.md
diff --git a/mkdocs.yml b/mkdocs.yml
index c546eca..00e5080 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -5,6 +5,7 @@ theme:
     - content.code.copy
     - content.code.select
     - navigation.path
+    - navigation.sections
     - content.code.annotate
   palette:
     primary: black
@@ -39,31 +40,24 @@ nav:
   - Home: index.md
   - Quick start: quickstart.md
   - Concepts:
-      - Threads: concepts/threads.md
-      - Thread items: concepts/thread-items.md
+      - Threads and items: concepts/threads.md
       - Thread stream events: concepts/thread-stream-events.md
       - Widgets: concepts/widgets.md
       - Actions: concepts/actions.md
       - Entities: concepts/entities.md
   - Guides:
-      - Serve ChatKit from your backend: guides/serve-chatkit.md
-      - Persist ChatKit threads and messages: guides/persist-chatkit-data.md
-      - Compose model input: guides/compose-model-input.md
-      - Run inference: guides/run-inference.md
-      - Stream responses back to your user: guides/stream-thread-events.md
-      - Add features:
-          - Save thread titles: guides/add-features/save-thread-titles.md
-          - Accept attachments: guides/add-features/accept-attachments.md
-          - Make client tool calls: guides/add-features/make-client-tool-calls.md
-          - Send client effects: guides/add-features/send-client-effects.md
-          - Show progress for long-running tools: guides/add-features/show-progress-for-long-running-tools.md
-          - Stream widgets: guides/add-features/stream-widgets.md
-          - Handle widget actions: guides/add-features/handle-widget-actions.md
-          - Create custom forms: guides/add-features/create-custom-forms.md
-          - Handle feedback: guides/add-features/handle-feedback.md
-          - Allow @-mentions in user messages: guides/add-features/allow-mentions.md
-          - Add annotations in assistant messages: guides/add-features/add-annotations.md
-          - Disable new messages for a thread: guides/add-features/disable-new-messages.md
+      - Respond to a user message: guides/respond-to-user-message.md
+      - Let users browse past threads: guides/browse-past-threads.md
+      - Accept attachments: guides/accept-attachments.md
+      - Make client tool calls: guides/make-client-tool-calls.md
+      - Send client effects: guides/send-client-effects.md
+      - Show progress for long-running tools: guides/show-progress-for-long-running-tools.md
+      - Stream widgets: guides/stream-widgets.md
+      - Handle widget actions: guides/handle-widget-actions.md
+      - Create custom forms: guides/create-custom-forms.md
+      - Handle feedback: guides/handle-feedback.md
+      - Allow @-mentions in user messages: guides/allow-mentions.md
+      - Add annotations in assistant messages: guides/add-annotations.md
   - API Reference:
       - Overview: api/chatkit/index.md
       - Modules:

From df03dc0b6ac612bd24b460b3f3543c6ef7f69953 Mon Sep 17 00:00:00 2001
From: Jiwon Kim <jiwon@openai.com>
Date: Fri, 5 Dec 2025 02:02:37 -0800
Subject: [PATCH 3/4] more guides

---
 docs/concepts/actions.md                      |   2 +-
 docs/concepts/tools.md                        |  37 ++
 docs/concepts/widgets.md                      |   6 +-
 docs/guides/accept-attachments.md             | 149 --------
 docs/guides/accept-rich-user-input.md         | 304 ++++++++++++++++
 docs/guides/allow-mentions.md                 | 129 -------
 ...uild-interactive-responses-with-widgets.md | 328 ++++++++++++++++++
 docs/guides/compose-model-input.md            |   3 -
 docs/guides/create-custom-forms.md            |  69 ----
 docs/guides/handle-widget-actions.md          | 129 -------
 .../keep-your-app-in-sync-with-chatkit.md     |  77 ++++
 .../guides/let-users-pick-tools-and-models.md | 138 ++++++++
 .../let-your-app-draft-and-send-messages.md   |  91 +++++
 docs/guides/make-client-tool-calls.md         |  57 ---
 .../pass-extra-app-context-to-your-model.md   | 166 +++++++++
 docs/guides/persist-chatkit-data.md           |   3 -
 .../guides/prepare-your-app-for-production.md | 255 ++++++++++++++
 docs/guides/respond-to-user-message.md        |  10 +-
 docs/guides/run-inference.md                  |   3 -
 docs/guides/send-client-effects.md            |  52 ---
 docs/guides/serve-chatkit.md                  |   3 -
 .../show-progress-for-long-running-tools.md   | 104 ------
 docs/guides/stream-thread-events.md           |   3 -
 docs/guides/stream-widgets.md                 | 108 ------
 docs/guides/update-client-during-response.md  | 164 +++++++++
 docs/index.md                                 |  14 +-
 mkdocs.yml                                    |  22 +-
 27 files changed, 1590 insertions(+), 836 deletions(-)
 create mode 100644 docs/concepts/tools.md
 delete mode 100644 docs/guides/accept-attachments.md
 create mode 100644 docs/guides/accept-rich-user-input.md
 delete mode 100644 docs/guides/allow-mentions.md
 create mode 100644 docs/guides/build-interactive-responses-with-widgets.md
 delete mode 100644 docs/guides/compose-model-input.md
 delete mode 100644 docs/guides/create-custom-forms.md
 delete mode 100644 docs/guides/handle-widget-actions.md
 create mode 100644 docs/guides/keep-your-app-in-sync-with-chatkit.md
 create mode 100644 docs/guides/let-users-pick-tools-and-models.md
 create mode 100644 docs/guides/let-your-app-draft-and-send-messages.md
 delete mode 100644 docs/guides/make-client-tool-calls.md
 create mode 100644 docs/guides/pass-extra-app-context-to-your-model.md
 delete mode 100644 docs/guides/persist-chatkit-data.md
 create mode 100644 docs/guides/prepare-your-app-for-production.md
 delete mode 100644 docs/guides/run-inference.md
 delete mode 100644 docs/guides/send-client-effects.md
 delete mode 100644 docs/guides/serve-chatkit.md
 delete mode 100644 docs/guides/show-progress-for-long-running-tools.md
 delete mode 100644 docs/guides/stream-thread-events.md
 delete mode 100644 docs/guides/stream-widgets.md
 create mode 100644 docs/guides/update-client-during-response.md

diff --git a/docs/concepts/actions.md b/docs/concepts/actions.md
index 9731e4e..c90816a 100644
--- a/docs/concepts/actions.md
+++ b/docs/concepts/actions.md
@@ -19,4 +19,4 @@ When you set `handler: "client"`, the action flows into the client SDK’s `widg
 Your client integration can also initiate actions directly with `chatkit.sendCustomAction(action, itemId?)`, optionally namespaced to a specific widget item. The server receives these in `ChatKitServer.action` just like a widget-triggered action and can stream widgets, messages, or client effects in response. This pattern is useful when a flow starts outside a widget—or after a client-handled action—but you still want the server to persist results or involve the model.
 
 ## Related guides
-- [Handle widget actions](../guides/handle-widget-actions.md)
+- [Build interactive responses with widgets](../guides/build-interactive-responses-with-widgets.md)
diff --git a/docs/concepts/tools.md b/docs/concepts/tools.md
new file mode 100644
index 0000000..de9898b
--- /dev/null
+++ b/docs/concepts/tools.md
@@ -0,0 +1,37 @@
+# Tools
+
+Tools let the assistant call into your application logic during a turn—for example to search data, run a workflow, or fetch the user’s current context—then feed the results back into the model.
+
+At a high level:
+
+- **Server tools** run on your backend. The assistant calls them through your inference pipeline, and you stream their results back into the conversation.
+- **Client tools** run in the browser or host app. ChatKit surfaces a tool call as a streamed thread item, lets the client handle it, then resumes the conversation with the tool’s output.
+
+## Server tools
+
+Server tools are ordinary Python functions you register with your inference setup (for example, as tools on an agent or as explicit steps in your pipeline). During inference, the model can decide to call them; ChatKit serializes the call, runs your function, and feeds the output back to the model.
+
+Use server tools to:
+
+- Look up data in your own APIs or databases.
+- Kick off long-running jobs while streaming progress updates.
+- Update your own domain state (tickets, orders, files, etc.) in response to a turn.
+
+From the model’s perspective, tools are structured, named capabilities it can invoke instead of guessing from free text.
+
+## Client tools
+
+Some operations can only run on the client—for example:
+
+- Reading the current selection in a canvas or document.
+- Inspecting local application state that never leaves the browser.
+- Calling into the host app (for example, a design tool or IDE) via its own APIs.
+
+Client tools let the model request that kind of data mid-turn:
+
+- On the server, you instruct your inference pipeline to stop when a specific tool is called (for example, by using `StopAtTools` around that tool).
+- ChatKit turns the tool call into a streamed thread item.
+- On the client, `onClientTool` receives that item, runs your callback, and returns a JSON result.
+- ChatKit sends the result back to the server, which starts a new stream to continue the run with the tool output included as model input.
+
+Use client tools when the model needs fresh, local context it cannot safely obtain from server-side state alone.
diff --git a/docs/concepts/widgets.md b/docs/concepts/widgets.md
index 8740266..a80266d 100644
--- a/docs/concepts/widgets.md
+++ b/docs/concepts/widgets.md
@@ -31,7 +31,7 @@ Every widget must be wrapped in a root-level container element. For single, self
 
 ## .widget files
 
-Exported `.widget` files are JSON blobs that include the widget template, the expected data schema, and supporting metadata. You can load them server-side and render widgets dynamically with `WidgetTemplate`; see [Build widgets with `WidgetTemplate`](../guides/stream-widgets.md#build-widgets-with-widgettemplate) for examples.
+Exported `.widget` files are JSON blobs that include the widget template, the expected data schema, and supporting metadata. You can load them server-side and render widgets dynamically with `WidgetTemplate`; see [Build widgets with `WidgetTemplate`](../guides/build-interactive-responses-with-widgets.md#build-widgets-with-widgettemplate) for examples.
 
 ## WidgetItem
 
@@ -51,6 +51,4 @@ The [`entities.onRequestPreview`](https://openai.github.io/chatkit-js/api/openai
 
 ## Related guides
 
-- [Stream widgets](../guides/stream-widgets.md)
-- [Create custom forms](../guides/create-custom-forms.md)
-- [Handle widget actions](../guides/handle-widget-actions.md)
+- [Build interactive responses with widgets](../guides/build-interactive-responses-with-widgets.md)
diff --git a/docs/guides/accept-attachments.md b/docs/guides/accept-attachments.md
deleted file mode 100644
index f29776a..0000000
--- a/docs/guides/accept-attachments.md
+++ /dev/null
@@ -1,149 +0,0 @@
-# Accept attachments
-
-Let users attach files/images by turning on client support, choosing an upload strategy, wiring the upload endpoints, and converting attachments to model inputs.
-
-## Enable attachments in the client
-
-Enable attachments in the composer and configure client-side limits:
-
-- Set `ChatKitOptions.composer.attachments.enabled = true` so the composer accepts file attachments.
-- In the same `composer.attachments` block, configure accepted MIME types,per-message maximum attachment count, and per-file size limits: see [docs](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/composeroption/#attachments).
-
-## Configure an upload strategy
-
-Set [`ChatKitOptions.api.uploadStrategy`](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/fileuploadstrategy/) to:
-
-- **Direct**: your backend exposes a single upload URL that accepts the bytes and writes attachment metadata to your `Store`. Simpler and faster when you control uploads directly from the app server.
-- **Two-phase**: the client makes a ChatKit API request to create an attachment metadata record (which forwards the request to `AttachmentStore`), you return an `upload_url` as part of the created attachment metadata, and the client uploads bytes in a second step. Prefer this when you front object storage with presigned/temporary URLs or want to offload upload bandwidth (e.g. to a third-party blob storage).
-
-Both strategies still require an `AttachmentStore` for delete cleanup. Choose direct for simplicity on the same origin; choose two-phase for cloud storage and larger files.
-
-## Enforce attachment access control
-
-Neither attachment metadata nor file bytes are protected by ChatKit. Use the `context` passed into your `AttachmentStore` methods to authorize every create/read/delete. Only return IDs, bytes, or signed URLs when the caller owns the attachment, and prefer short-lived download URLs. Skipping these checks can leak customer data.
-
-## If you chose direct upload
-
-Add the upload endpoint referenced in `uploadStrategy`. It must:
-
-- accept `multipart/form-data` with a `file` field,
-- store the bytes wherever you like,
-- create `Attachment` metadata, persist it via `Store.save_attachment`, and
-- return the `Attachment` JSON.
-
-Implement `AttachmentStore.delete_attachment` to delete the stored bytes; `ChatKitServer` will then call `Store.delete_attachment` to drop metadata.
-
-Example client configuration:
-
-```js
-{
-  type: "direct",
-  uploadUrl: "/files",
-}
-```
-
-Example FastAPI direct upload endpoint:
-
-```python
-@app.post("/files")
-async def upload_file(request: Request):
-    form_data = await request.form()
-    file = form_data.get("file")
-
-    # Your blob store upload
-    attachment = await upload_to_blob_store(file)
-
-    return Response(content=attachment.model_dump_json(), media_type="application/json")
-```
-
-## If you chose two-phase upload
-
-Implement `AttachmentStore.create_attachment` to:
-
-- build an `upload_url` that accepts `multipart/form-data` with a `file` field (direct PUTs are currently not supported),
-- build the `Attachment` model,
-- persist it via `Store.save_attachment`, and
-- return it.
-
-Implement `AttachmentStore.delete_attachment` to delete the stored bytes; `ChatKitServer` will call `Store.delete_attachment` afterward.
-
-- The client POSTs the bytes to `upload_url` after it receives the created attachment metadata in the response.
-
-Client configuration:
-
-```js
-{
-  type: "two_phase",
-}
-```
-
-Example two-phase store issuing a multipart upload URL:
-
-```python
-attachment_store = BlobAttachmentStore()
-server = MyChatKitServer(store=data_store, attachment_store=attachment_store)
-
-class BlobAttachmentStore(AttachmentStore[RequestContext]):
-    def generate_attachment_id(self, mime_type: str, context: RequestContext) -> str:
-        return f"att_{uuid4().hex}"
-
-    async def create_attachment(
-        self, input: AttachmentCreateParams, context: RequestContext
-    ) -> Attachment:
-        att_id = self.generate_attachment_id(input.mime_type, context)
-        upload_url = issue_multipart_upload_url(att_id, input.mime_type)  # your blob store
-        attachment = Attachment(
-            id=att_id,
-            mime_type=input.mime_type,
-            name=input.name,
-            upload_url=upload_url,
-        )
-        await data_store.save_attachment(attachment, context=context)
-        return attachment
-
-    async def delete_attachment(self, attachment_id: str, context: RequestContext) -> None:
-        await delete_blob(att_id=attachment_id)  # your blob store
-```
-
-## Convert attachments to model input
-
-Attachments arrive on `input_user_message.attachments` in `ChatKitServer.respond`. The default `ThreadItemConverter` does not handle them, so subclass and implement `attachment_to_message_content` to return a `ResponseInputContentParam` before calling `Runner.run_streamed`.
-
-Example using a blob fetch helper:
-
-```python
-from chatkit.agents import ThreadItemConverter
-from chatkit.types import ImageAttachment
-from openai.types.responses import ResponseInputFileParam, ResponseInputImageParam
-
-async def read_bytes(attachment_id: str) -> bytes:
-    ...  # fetch from your blob store
-
-def as_data_url(mime: str, content: bytes) -> str:
-    return "data:" + mime + ";base64," + base64.b64encode(content).decode("utf-8")
-
-class MyConverter(ThreadItemConverter):
-    async def attachment_to_message_content(self, attachment):
-        content = await read_bytes(attachment.id)
-        if isinstance(attachment, ImageAttachment):
-            return ResponseInputImageParam(
-                type="input_image",
-                detail="auto",
-                image_url=as_data_url(attachment.mime_type, content),
-            )
-        if attachment.mime_type == "application/pdf":
-            return ResponseInputFileParam(
-                type="input_file",
-                file_data=as_data_url(attachment.mime_type, content),
-                filename=attachment.name or "unknown",
-            )
-        # For other text formats, check for API support first before
-        # sending as a ResponseInputFileParam.
-```
-
-## Show image attachment previews in thread
-
-Set `ImageAttachment.preview_url` to allow the client to render thumbnails.
-
-- If your preview URLs are **permanent/public**, set `preview_url` once when creating the attachment and persist it.
-- If your storage uses **expiring URLs**, generate a fresh `preview_url` when returning attachment metadata (for example, in `Store.load_thread_items` and `Store.load_attachment`) rather than persisting a long-lived URL. In this case, returning a short-lived signed URL directly is the simplest approach. Alternatively, you may return a redirect that resolves to a temporary signed URL, as long as the final URL serves image bytes with appropriate CORS headers.
diff --git a/docs/guides/accept-rich-user-input.md b/docs/guides/accept-rich-user-input.md
new file mode 100644
index 0000000..052e7eb
--- /dev/null
+++ b/docs/guides/accept-rich-user-input.md
@@ -0,0 +1,304 @@
+# Accept rich user input
+
+This guide explains how a ChatKit server accepts user input beyond plain text—such as attachments and @-mentions—and makes it available to your inference pipeline.
+
+At a high level:
+
+- Attachments let users upload files that your model can read.
+- @-mentions let users tag entities so the model does not have to guess from free text.
+
+## Attachments: let users upload files
+
+Let users attach files/images by turning on client support, choosing an upload strategy, wiring the upload endpoints, and converting attachments to model inputs.
+
+### Enable attachments in the client
+
+Turn on attachments in the composer and configure client-side limits:
+
+```ts
+const chatkit = useChatKit({
+  // ...
+  composer: {
+    attachments: {
+      enabled: true,
+      // configure accepted MIME types, count, and size limits here
+    },
+  },
+});
+```
+
+Under the hood this maps to `ChatKitOptions.composer.attachments`; see the [`composer.attachments` docs](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/composeroption/#attachments) for all available options.
+
+### Configure an upload strategy
+
+Set [`ChatKitOptions.api.uploadStrategy`](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/fileuploadstrategy/) to:
+
+- **Direct**: your backend exposes a single upload URL that accepts the bytes and writes attachment metadata to your `Store`. Simpler and faster when you control uploads directly from the app server.
+- **Two-phase**: the client makes a ChatKit API request to create an attachment metadata record (which forwards the request to `AttachmentStore`), you return an `upload_url` as part of the created attachment metadata, and the client uploads bytes in a second step. Prefer this when you front object storage with presigned/temporary URLs or want to offload upload bandwidth (for example, to a third-party blob storage).
+
+Both strategies still require an `AttachmentStore` for delete cleanup. Choose direct for simplicity on the same origin; choose two-phase for cloud storage and larger files.
+
+### Enforce attachment access control
+
+Neither attachment metadata nor file bytes are protected by ChatKit. Use the `context` passed into your `AttachmentStore` methods to authorize every create/read/delete. Only return IDs, bytes, or signed URLs when the caller owns the attachment, and prefer short-lived download URLs. Skipping these checks can leak customer data.
+
+### Direct upload
+
+Add the upload endpoint referenced in `uploadStrategy`. It must:
+
+- accept `multipart/form-data` with a `file` field,
+- store the bytes wherever you like,
+- create `Attachment` metadata, persist it via `Store.save_attachment`, and
+- return the `Attachment` JSON.
+
+Implement `AttachmentStore.delete_attachment` to delete the stored bytes; `ChatKitServer` will then call `Store.delete_attachment` to drop metadata.
+
+Example client configuration:
+
+```js
+{
+  type: "direct",
+  uploadUrl: "/files",
+}
+```
+
+Example FastAPI direct upload endpoint:
+
+```python
+@app.post("/files")
+async def upload_file(request: Request):
+    form_data = await request.form()
+    file = form_data.get("file")
+
+    # Your blob store upload
+    attachment = await upload_to_blob_store(file)
+
+    return Response(content=attachment.model_dump_json(), media_type="application/json")
+```
+
+### Two-phase upload
+
+Implement `AttachmentStore.create_attachment` to:
+
+- build an `upload_url` that accepts `multipart/form-data` with a `file` field (direct PUTs are currently not supported),
+- build the `Attachment` model,
+- persist it via `Store.save_attachment`, and
+- return it.
+
+Implement `AttachmentStore.delete_attachment` to delete the stored bytes; `ChatKitServer` will call `Store.delete_attachment` afterward.
+
+- The client POSTs the bytes to `upload_url` after it receives the created attachment metadata in the response.
+
+Client configuration:
+
+```js
+{
+  type: "two_phase",
+}
+```
+
+Example two-phase store issuing a multipart upload URL:
+
+```python
+attachment_store = BlobAttachmentStore()
+server = MyChatKitServer(store=data_store, attachment_store=attachment_store)
+
+
+class BlobAttachmentStore(AttachmentStore[RequestContext]):
+    def generate_attachment_id(self, mime_type: str, context: RequestContext) -> str:
+        return f\"att_{uuid4().hex}\"
+
+    async def create_attachment(
+        self, input: AttachmentCreateParams, context: RequestContext
+    ) -> Attachment:
+        att_id = self.generate_attachment_id(input.mime_type, context)
+        upload_url = issue_multipart_upload_url(att_id, input.mime_type)  # your blob store
+        attachment = Attachment(
+            id=att_id,
+            mime_type=input.mime_type,
+            name=input.name,
+            upload_url=upload_url,
+        )
+        await data_store.save_attachment(attachment, context=context)
+        return attachment
+
+    async def delete_attachment(self, attachment_id: str, context: RequestContext) -> None:
+        await delete_blob(att_id=attachment_id)  # your blob store
+```
+
+### Convert attachments to model input
+
+Attachments arrive on `input_user_message.attachments` in `ChatKitServer.respond`. The default `ThreadItemConverter` does not handle them, so subclass and implement `attachment_to_message_content` to return a `ResponseInputContentParam` before calling `Runner.run_streamed`.
+
+Example using a blob fetch helper:
+
+```python
+from chatkit.agents import ThreadItemConverter
+from chatkit.types import ImageAttachment
+from openai.types.responses import ResponseInputFileParam, ResponseInputImageParam
+
+
+async def read_bytes(attachment_id: str) -> bytes:
+    ...  # fetch from your blob store
+
+
+def as_data_url(mime: str, content: bytes) -> str:
+    return "data:" + mime + ";base64," + base64.b64encode(content).decode("utf-8")
+
+
+class MyConverter(ThreadItemConverter):
+    async def attachment_to_message_content(self, attachment):
+        content = await read_bytes(attachment.id)
+        if isinstance(attachment, ImageAttachment):
+            return ResponseInputImageParam(
+                type="input_image",
+                detail="auto",
+                image_url=as_data_url(attachment.mime_type, content),
+            )
+        if attachment.mime_type == "application/pdf":
+            return ResponseInputFileParam(
+                type="input_file",
+                file_data=as_data_url(attachment.mime_type, content),
+                filename=attachment.name or "unknown",
+            )
+        # For other text formats, check for API support first before
+        # sending as a ResponseInputFileParam.
+```
+
+### Show image attachment previews in thread
+
+Set `ImageAttachment.preview_url` to allow the client to render thumbnails.
+
+- If your preview URLs are **permanent/public**, set `preview_url` once when creating the attachment and persist it.
+- If your storage uses **expiring URLs**, generate a fresh `preview_url` when returning attachment metadata (for example, in `Store.load_thread_items` and `Store.load_attachment`) rather than persisting a long-lived URL. In this case, returning a short-lived signed URL directly is the simplest approach. Alternatively, you may return a redirect that resolves to a temporary signed URL, as long as the final URL serves image bytes with appropriate CORS headers.
+
+## @-mentions: tag entities in user messages
+
+Enable @-mentions so users can tag entities (like documents, tickets, or users) instead of pasting raw identifiers. Mentions travel through ChatKit as structured tags so the model can resolve entities instead of guessing from free text.
+
+### Enable as-you-type entity lookup in the composer
+
+To enable entity tagging as @-mentions in the composer, configure [`entities.onTagSearch`](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/entitiesoption/#ontagsearch) as a ChatKit.js option.
+
+It should return a list of [Entity](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/entity/) objects that match the query string.
+
+```ts
+const chatkit = useChatKit({
+  // ...
+  entities: {
+    onTagSearch: async (query: string) => {
+      return [
+        {
+          id: "article_123",
+          title: "The Future of AI",
+          group: "Trending",
+          icon: "globe",
+          data: { type: "article" }
+        },
+        {
+          id: "article_124",
+          title: "One weird trick to improve your sleep",
+          group: "Trending",
+          icon: "globe",
+          data: { type: "article" }
+        },
+      ]
+    },
+  },
+})
+```
+
+### Convert tags into model input in your server
+
+Mentions arrive server-side as structured tags. Override `ThreadItemConverter.tag_to_message_content` to describe what each tag refers to and translate it into model-readable content.
+
+Example converter method that wraps the tagged entity details in custom markup:
+
+```python
+from chatkit.agents import ThreadItemConverter
+from chatkit.types import UserMessageTagContent
+from openai.types.responses import ResponseInputTextParam
+
+
+class MyThreadItemConverter(ThreadItemConverter):
+    async def tag_to_message_content(
+        self, tag: UserMessageTagContent
+    ) -> ResponseInputTextParam:
+        if tag.type == "article":
+            # Load or unpack the entity the tag refers to
+            summary = await fetch_article_summary(tag.id)
+            return ResponseInputTextParam(
+                type="input_text",
+                text=(
+                    "<ARTICLE_TAG>\n"
+                    f"ID: {tag.id}\n"
+                    f"Title: {tag.text}\n"
+                    f"Summary: {summary}\n"
+                    "</ARTICLE_TAG>"
+                ),
+            )
+```
+
+### Pair mentions with retrieval tool calls
+
+When the referenced content is too large to inline, keep the tag lean (id + short summary) and let the model fetch details via a tool. In your system prompt, tell the assistant to call the retrieval tool when it sees an `ARTICLE_TAG`.
+
+Example tool paired with the converter above:
+
+```python
+from agents import Agent, StopAtTools, RunContextWrapper, function_tool
+from chatkit.agents import AgentContext
+
+
+@function_tool(description_override="Fetch full article content by id.")
+async def fetch_article(ctx: RunContextWrapper[AgentContext], article_id: str):
+    article = await load_article_content(article_id)
+    return {
+        "title": article.title,
+        "content": article.body,
+        "url": article.url,
+    }
+
+
+assistant = Agent[AgentContext](
+    ...,
+    tools=[fetch_article],
+)
+```
+
+In `tag_to_message_content`, include the id the tool expects (for example, `tag.id` or `tag.data["article_id"]`). The model can then decide to call `fetch_article` to pull the full text instead of relying solely on the brief summary in the tag.
+
+### Prompt the model about mentions
+
+Add short system guidance to help the assistant understand the input item that adds details about the @-mention.
+
+For example:
+
+```
+- <ARTICLE_TAG>...</ARTICLE_TAG> is a summary of an article the user referenced.
+- Use it as trusted context when answering questions about that article.
+- Do not restate the summary verbatim; answer the user’s question concisely.
+- Call the `fetch_article` tool with the article id from the tag when more
+  detail is needed or the user asks for specifics not in the summary.
+```
+
+Combined with the converter above, the model receives explicit, disambiguated entity context while users keep a rich mention UI.
+
+### Handle clicks and previews
+
+Clicks and hover previews apply to the tagged entities shown in past user messages. Mark an entity as interactive when you return it from `onTagSearch` so the client knows to wire these callbacks:
+
+```ts
+{
+  id: "article_123",
+  title: "The Future of AI",
+  group: "Trending",
+  icon: "globe",
+  interactive: true, // clickable/previewable
+  data: { type: "article" }
+}
+```
+
+- `entities.onClick` fires when a user clicks a tag in the transcript. Handle navigation or open a detail view. See the [onClick option](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/entitiesoption/#onclick).
+- `entities.onRequestPreview` runs when the user hovers or taps a tag that has `interactive: true`. Return a `BasicRoot` widget; you can build one with `WidgetTemplate.build_basic(...)` if you are building the preview widgets server-side. See the [onRequestPreview option](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/entitiesoption/#onrequestpreview).
+
diff --git a/docs/guides/allow-mentions.md b/docs/guides/allow-mentions.md
deleted file mode 100644
index eeb4685..0000000
--- a/docs/guides/allow-mentions.md
+++ /dev/null
@@ -1,129 +0,0 @@
-# Allow @-mentions in user messages
-
-Mentions travel through ChatKit as structured tags so the model can resolve entities instead of guessing from free text. Send `input_tag` parts from the client and translate them into model-readable context on the server.
-
-## Enable as-you-type entity lookup in the composer
-
-To enable entity tagging as @-mentions in the composer, configure [`entities.onTagSearch`](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/entitiesoption/#ontagsearch) as a ChatKit.js option.
-
-It should return a list of [Entity](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/entity/) objects that match the query string.
-
-
-```ts
-const chatkit = useChatKit({
-  // ...
-  entities: {
-    onTagSearch: async (query: string) => {
-      return [
-        {
-          id: "article_123",
-          title: "The Future of AI",
-          group: "Trending",
-          icon: "globe",
-          data: { type: "article" }
-        },
-        {
-          id: "article_124",
-          title: "One weird trick to improve your sleep",
-          group: "Trending",
-          icon: "globe",
-          data: { type: "article" }
-        },
-      ]
-    },
-  },
-})
-```
-
-## Convert tags into model input in your server
-
-Override `ThreadItemConverter.tag_to_message_content` to describe what each tag refers to.
-
-Example converter method that wraps the tagged entity details in custom markup:
-
-```python
-from chatkit.agents import ThreadItemConverter
-from chatkit.types import UserMessageTagContent
-from openai.types.responses import ResponseInputTextParam
-
-class MyThreadItemConverter(ThreadItemConverter):
-    async def tag_to_message_content(
-        self, tag: UserMessageTagContent
-    ) -> ResponseInputTextParam:
-        if tag.type == "article":
-          # Load or unpack the entity the tag refers to
-          summary = await fetch_article_summary(tag.id)
-          return ResponseInputTextParam(
-              type="input_text",
-              text=(
-                "<ARTICLE_TAG>\n"
-                f"ID: {tag.id}\n"
-                f"Title: {tag.text}\n"
-                f"Summary: {summary}\n"
-                "</ARTICLE_TAG>"
-              ),
-          )
-```
-
-
-## Pair mentions with retrieval tool calls
-
-When the referenced content is too large to inline, keep the tag lean (id + short summary) and let the model fetch details via a tool. In your system prompt, tell the assistant to call the retrieval tool when it sees an `ARTICLE_TAG`.
-
-Example tool paired with the converter above:
-
-```python
-from agents import Agent, StopAtTools, RunContextWrapper, function_tool
-from chatkit.agents import AgentContext
-
-@function_tool(description_override="Fetch full article content by id.")
-async def fetch_article(ctx: RunContextWrapper[AgentContext], article_id: str):
-    article = await load_article_content(article_id)
-    return {
-        "title": article.title,
-        "content": article.body,
-        "url": article.url,
-    }
-
-assistant = Agent[AgentContext](
-    ...,
-    tools=[fetch_article],
-)
-```
-
-In `tag_to_message_content`, include the id the tool expects (for example, `tag.id` or `tag.data["article_id"]`). The model can then decide to call `fetch_article` to pull the full text instead of relying solely on the brief summary in the tag.
-
-## Prompt the model about mentions
-
-Add short system guidance to help the assistant understand the input item that adds details about the @-mention.
-
-For example:
-
-```
-- <ARTICLE_TAG>...</ARTICLE_TAG> is a summary of an article the user referenced.
-- Use it as trusted context when answering questions about that article.
-- Do not restate the summary verbatim; answer the user’s question concisely.
-- Call the `fetch_article` tool with the article id from the tag when more
-  detail is needed or the user asks for specifics not in the summary.
-```
-
-Combined with the converter above, the model receives explicit, disambiguated entity context while users keep a rich mention UI.
-
-
-## Handle clicks and previews
-
-Clicks and hover previews apply to the tagged entities shown in past user messages. Mark an entity as interactive when you return it from `onTagSearch` so the client knows to wire these callbacks:
-
-```ts
-{
-  id: "article_123",
-  title: "The Future of AI",
-  group: "Trending",
-  icon: "globe",
-  interactive: true, // clickable/previewable
-  data: { type: "article" }
-}
-```
-
-- `entities.onClick` fires when a user clicks a tag in the transcript. Handle navigation or open a detail view. See the [onClick option](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/entitiesoption/#onclick).
-- `entities.onRequestPreview` runs when the user hovers or taps a tag that has `interactive: true`. Return a `BasicRoot` widget; you can build one with `WidgetTemplate.build_basic(...)` if you are building the preview widgets server-side. See the [onRequestPreview option](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/entitiesoption/#onrequestpreview).
diff --git a/docs/guides/build-interactive-responses-with-widgets.md b/docs/guides/build-interactive-responses-with-widgets.md
new file mode 100644
index 0000000..e04dbd8
--- /dev/null
+++ b/docs/guides/build-interactive-responses-with-widgets.md
@@ -0,0 +1,328 @@
+# Build interactive responses with widgets
+
+Use widgets to turn assistant responses into rich, interactive UIs. Design widgets visually, hydrate them with data on the server, stream them into the conversation, and wire actions and forms so users can click, edit, and submit without writing long free-text prompts.
+
+This guide covers:
+
+- Designing and loading widget templates
+- Streaming widgets from `respond` and from tools
+- Handling widget actions on the server and client
+- Building editable forms with widgets
+
+## Design widgets in ChatKit Studio
+
+Use <https://widgets.chatkit.studio> to visually design cards, lists, forms, charts, and other widget components. Populate the **Data** panel with sample values to preview how the widget renders with real inputs.
+
+When the layout and bindings look correct, click **Export** to download the generated `.widget` file. Commit this file alongside the server code that builds and renders the widget.
+
+## Build widgets with `WidgetTemplate`
+
+Load the `.widget` file with `WidgetTemplate.from_file` and hydrate it with runtime data. Placeholders inside the `.widget` template (Jinja-style `{{ }}` expressions) are rendered before the widget is streamed.
+
+```python
+from chatkit.widgets import WidgetTemplate
+
+message_template = WidgetTemplate.from_file("widgets/channel_message.widget")
+
+
+def build_message_widget(user_name: str, message: str):
+    # Replace this helper with whatever your integration uses to build widgets.
+    return message_template.build(
+        {
+            "user_name": user_name,
+            "message": message,
+        }
+    )
+```
+
+`WidgetTemplate.build` accepts plain dicts or Pydantic models. Use `.build_basic` if you're working with a `BasicRoot` widget outside of streaming.
+
+## Stream widgets from `respond`
+
+Use `stream_widget` to emit a one-off widget or stream updates from an async generator.
+
+```python
+from chatkit.server import stream_widget
+
+
+async def respond(...):
+    user_name = "Harry Potter"
+    message = "Yer a wizard, Harry"
+    message_widget = build_message_widget(user_name=user_name, message=message)
+    async for event in stream_widget(
+        thread,
+        message_widget,
+        copy_text=f"Message to {user_name}: {message}",
+        generate_id=lambda item_type: self.store.generate_item_id(
+            item_type, thread, context
+        ),
+    ):
+        yield event
+```
+
+To stream gradual updates, yield successive widget states from an async generator; `stream_widget` diffs and emits `ThreadItemUpdatedEvent`s for you.
+
+## Stream widgets from tools
+
+Tools can enqueue widgets via `AgentContext.stream_widget`; `stream_agent_response` forwards them to the client.
+
+```python
+from agents import RunContextWrapper, function_tool
+from chatkit.agents import AgentContext
+
+
+@function_tool(description_override="Display a sample widget to the user.")
+async def sample_widget(ctx: RunContextWrapper[AgentContext]):
+    message_widget = build_message_widget(...)
+    await ctx.context.stream_widget(message_widget)
+```
+
+## Stream widget updates while text streams
+
+The examples above return a fully completed static widget. You can also stream an updating widget by yielding new versions of the widget from a generator function. The ChatKit framework will send updates for the parts of the widget that have changed.
+
+!!! note "Text streaming support"
+    Currently, only `<Text>` and `<Markdown>` components marked with an `id` have their text updates streamed. Other diffs will forgo the streaming UI and replace and rerender parts of the widget client-side.
+
+```python
+from typing import AsyncGenerator
+
+from agents import RunContextWrapper, function_tool
+from chatkit.agents import AgentContext, Runner
+from chatkit.widgets import WidgetRoot
+
+
+@function_tool
+async def draft_message_to_harry(ctx: RunContextWrapper[AgentContext]):
+    # message_generator is your model/tool function that streams text
+    message_result = Runner.run_streamed(
+        message_generator, "Draft a message to Harry."
+    )
+
+    async def widget_generator() -> AsyncGenerator[WidgetRoot, None]:
+        message = ""
+        async for event in message_result.stream_events():
+            if (
+                event.type == "raw_response_event"
+                and event.data.type == "response.output_text.delta"
+            ):
+                message += event.data.delta
+                yield build_message_widget(
+                    user_name="Harry Potter",
+                    message=message,
+                )
+
+        # Final render after streaming completes.
+        yield build_message_widget(
+            user_name="Harry Potter",
+            message=message,
+        )
+
+    await ctx.context.stream_widget(widget_generator())
+```
+
+The inner generator collects the streamed text events and rebuilds the widget with the latest message so the UI updates incrementally.
+
+## Handle widget actions
+
+Actions let widget interactions trigger server or client logic without posting a chat message.
+
+### Define actions in your widget
+
+Configure actions as part of the widget definition while you design it in <https://widgets.chatkit.studio>. Add an action to any action-capable component such as `Button.onClickAction`; explore supported components on the components page.
+
+```jsx
+<Button
+  label="Send message"
+  onClickAction={{
+    type: "send_message",
+    payload: { text: "Ping support" },
+  }}
+/>
+```
+
+### Choose client vs server handling
+
+Actions are handled on the server by default and flow into `ChatKitServer.action`. Set `handler: "client"` in the action to route it to your frontend’s `widgets.onAction` instead. Use the server when you need to update thread state or stream widgets; use the client for immediate UI work or to chain into a follow-up `sendCustomAction` after local logic completes.
+
+Example widget definition with a client action handler:
+
+```jsx
+<Button
+  label="Send message"
+  onClickAction={{
+    type: "send_message",
+    handler: "client",
+    payload: { text: "Ping support" },
+  }}
+/>
+```
+
+### Handle actions on the server
+
+Implement `ChatKitServer.action` to process incoming actions. The `sender` argument is the widget item that triggered the action (if available).
+
+```python
+from datetime import datetime
+
+from chatkit.server import ChatKitServer, stream_widget
+from chatkit.types import HiddenContextItem, WidgetItem
+
+
+class MyChatKitServer(ChatKitServer[RequestContext]):
+    async def action(self, thread, action, sender, context):
+        if action.type == "send_message":
+            await send_to_chat(action.payload["text"])
+
+            # Record the user action so the model can see it on the next turn.
+            hidden = HiddenContextItem(
+                id="generated-item-id",
+                thread_id=thread.id,
+                created_at=datetime.now(),
+                content=f"User sent message: {action.payload['text']}",
+            )
+            # HiddenContextItems need to be manually saved because ChatKitServer
+            # only auto-saves streamed items, and HiddenContextItem should never be streamed to the client.
+            await self.store.add_thread_item(thread.id, hidden, context)
+
+            # Stream an updated widget back to the client.
+            updated_widget = build_message_widget(text=action.payload["text"])
+            async for event in stream_widget(
+                thread,
+                updated_widget,
+                generate_id=lambda item_type: self.store.generate_item_id(
+                    item_type, thread, context
+                ),
+            ):
+                yield event
+```
+
+Treat action payloads as untrusted input from the client.
+
+### Handle actions on the client
+
+Provide [`widgets.onAction`](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/widgetsoption) when creating ChatKit on the client; you can still forward follow-up actions to the server from your `onAction` callback with the `sendCustomAction()` command if needed.
+
+```ts
+const chatkit = useChatKit({
+  // ...
+  widgets: {
+    onAction: async (action, widgetItem) => {
+      if (action.type === "save_profile") {
+        const result = await saveProfile(action.payload);
+
+        // Optionally invoke a server action after client-side work completes.
+        await chatkit.sendCustomAction(
+          {
+            type: "save_profile_complete",
+            payload: {...result, user_id: action.payload.user_id},
+          },
+          widgetItem.id,
+        );
+      }
+    },
+  },
+});
+```
+
+On the server, handle the follow-up action (`save_profile_complete`) in the `action` method to stream refreshed widgets or messages.
+
+### Control loading behavior
+
+Use `loadingBehavior` to control how actions trigger different loading states in a widget.
+
+```jsx
+<Button
+  label="Send message"
+  onClickAction={{
+    type: "send_message",
+    loadingBehavior: "container",
+  }}
+/>
+```
+
+| Value       | Behavior                                                                                                                        |
+| ----------- | ------------------------------------------------------------------------------------------------------------------------------- |
+| `auto`      | The action will adapt to how it’s being used. (_default_)                                                                      |
+| `self`      | The action triggers loading state on the widget node that the action was bound to.                                             |
+| `container` | The action triggers loading state on the entire widget container. This causes the widget to fade out slightly and become inert. |
+| `none`      | No loading state                                                                                                               |
+
+Generally, we recommend using `auto`, which is the default. `auto` triggers loading states based on where the action is bound, for example:
+
+- `Button.onClickAction` → `self`
+- `Select.onChangeAction` → `none`
+- `Card.confirm.action` → `container`
+
+## Create custom forms with widgets
+
+Wrap widgets that collect user input in a `Form` to have their values automatically injected into every action triggered inside that form. The form values arrive in the action payload, keyed by each field’s `name`.
+
+- `<Select name="title" />` → `action.payload["title"]`
+- `<Select name="todo.title" />` → `action.payload["todo"]["title"]`
+
+```jsx
+<Form
+  direction="col"
+  onSubmitAction={{
+    type: "update_todo",
+    payload: { id: todo.id },
+  }}
+>
+  <Title value="Edit Todo" />
+  <Text value="Title" color="secondary" size="sm" />
+  <Text
+    value={todo.title}
+    editable={{ name: "title", required: true }}
+  />
+  <Text value="Description" color="secondary" size="sm" />
+  <Text
+    value={todo.description}
+    editable={{ name: "description" }}
+  />
+  <Button label="Save" submit />
+</Form>
+```
+
+On the server, read the form values from the action payload. Any action originating from inside the form will include the latest field values.
+
+```python
+from collections.abc import AsyncIterator
+
+from chatkit.server import ChatKitServer
+from chatkit.types import Action, ThreadMetadata, ThreadStreamEvent, WidgetItem
+
+
+class MyChatKitServer(ChatKitServer[RequestContext]):
+    async def action(
+        self,
+        thread: ThreadMetadata,
+        action: Action[str, Any],
+        sender: WidgetItem | None,
+        context: RequestContext,
+    ) -> AsyncIterator[ThreadStreamEvent]:
+        if action.type == "update_todo":
+            todo_id = action.payload["id"]
+            # Any action that originates from within the Form will
+            # include title and description
+            title = action.payload["title"]
+            description = action.payload["description"]
+
+            # ...
+```
+
+### Validation
+
+`Form` uses basic native form validation; it enforces `required` and `pattern` on configured fields and blocks submission when any field is invalid.
+
+We may add new validation modes with better UX, more expressive validation, and custom error display. Until then, widgets are not a great medium for complex forms with tricky validation. If you need this, a better pattern is to use client-side action handling to trigger a modal, show a custom form there, then pass the result back into ChatKit with `sendCustomAction`.
+
+### Treating `Card` as a `Form`
+
+You can pass `asForm=True` to `Card` and it will behave as a `Form`, running validation and passing collected fields to the Card’s `confirm` action.
+
+### Payload key collisions
+
+If there is a naming collision with some other existing pre-defined key on your payload, the form value will be ignored. This is probably a bug, so we’ll emit an `error` event when we see this.
+
+
diff --git a/docs/guides/compose-model-input.md b/docs/guides/compose-model-input.md
deleted file mode 100644
index e329f9d..0000000
--- a/docs/guides/compose-model-input.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Compose model inputs
-
-This guide has been consolidated into [Respond to a user message](respond-to-user-message.md).
diff --git a/docs/guides/create-custom-forms.md b/docs/guides/create-custom-forms.md
deleted file mode 100644
index 5bc2222..0000000
--- a/docs/guides/create-custom-forms.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Create custom forms
-
-Wrap widgets that collect user input in a `Form` to have their values automatically injected into every action triggered inside that form. The form values arrive in the action payload, keyed by each field’s `name`.
-
-- `<Select name="title" />` → `action.payload["title"]`
-- `<Select name="todo.title" />` → `action.payload["todo"]["title"]`
-
-```jsx
-<Form
-  direction="col"
-  onSubmitAction={{
-    type: "update_todo",
-    payload: { id: todo.id },
-  }}
->
-  <Title value="Edit Todo" />
-  <Text value="Title" color="secondary" size="sm" />
-  <Text
-    value={todo.title}
-    editable={{ name: "title", required: true }}
-  />
-  <Text value="Description" color="secondary" size="sm" />
-  <Text
-    value={todo.description}
-    editable={{ name: "description" }}
-  />
-  <Button label="Save" submit />
-</Form>
-```
-
-On the server, read the form values from the action payload. Any action originating from inside the form will include the latest field values.
-
-```python
-from collections.abc import AsyncIterator
-from chatkit.server import ChatKitServer
-from chatkit.types import Action, ThreadMetadata, ThreadStreamEvent, WidgetItem
-
-
-class MyChatKitServer(ChatKitServer[RequestContext]):
-    async def action(
-        self,
-        thread: ThreadMetadata,
-        action: Action[str, Any],
-        sender: WidgetItem | None,
-        context: RequestContext,
-    ) -> AsyncIterator[ThreadStreamEvent]:
-        if action.type == "update_todo":
-            todo_id = action.payload["id"]
-            # Any action that originates from within the Form will
-            # include title and description
-            title = action.payload["title"]
-            description = action.payload["description"]
-
-            # ...
-```
-
-### Validation
-
-`Form` uses basic native form validation; it enforces `required` and `pattern` on configured fields and blocks submission when any field is invalid.
-
-We may add new validation modes with better UX, more expressive validation, and custom error display. Until then, widgets are not a great medium for complex forms with tricky validation. If you need this, a better pattern is to use client-side action handling to trigger a modal, show a custom form there, then pass the result back into ChatKit with `sendCustomAction`.
-
-### Treating `Card` as a `Form`
-
-You can pass `asForm=True` to `Card` and it will behave as a `Form`, running validation and passing collected fields to the Card’s `confirm` action.
-
-### Payload key collisions
-
-If there is a naming collision with some other existing pre-defined key on your payload, the form value will be ignored. This is probably a bug, so we’ll emit an `error` event when we see this.
diff --git a/docs/guides/handle-widget-actions.md b/docs/guides/handle-widget-actions.md
deleted file mode 100644
index c17abc0..0000000
--- a/docs/guides/handle-widget-actions.md
+++ /dev/null
@@ -1,129 +0,0 @@
-# Handle widget actions
-
-Actions let widget interactions trigger server or client logic without posting a chat message.
-
-## Define actions in your widget definition
-
-Configure actions as part of the widget definition while you design it in <https://widgets.chatkit.studio>. Add an action to any action-capable component such as `Button.onClickAction`; explore supported components [here](https://widgets.chatkit.studio/components). The exported `.widget` file already includes the action object, so loading the template is enough for ChatKit to send it.
-
-```jsx
-<Button
-    label="Send message"
-    onClickAction={{
-        type: "send_message",
-        payload: { text: "Ping support" },
-    }}
-/>
-```
-
-## Choose client vs server handling
-
-Actions are handled on the server by default and flow into `ChatKitServer.action`. Set `handler: "client"` in the action to route it to your frontend’s `widgets.onAction` instead. Use the server when you need to update thread state or stream widgets; use the client for immediate UI work or to chain into a follow-up `sendCustomAction` after local logic completes.
-
-Example widget definition with a client action handler:
-
-```jsx
-<Button
-    label="Send message"
-    onClickAction={{
-        type: "send_message",
-        handler: "client",
-        payload: { text: "Ping support" },
-    }}
-/>
-```
-
-## Handle actions on the server
-
-Implement `ChatKitServer.action` to process incoming actions. The `sender` argument is the widget item that triggered the action (if available).
-
-```python
-from datetime import datetime
-
-from chatkit.server import ChatKitServer, stream_widget
-from chatkit.types import HiddenContextItem, WidgetItem
-
-class MyChatKitServer(ChatKitServer[RequestContext]):
-    async def action(self, thread, action, sender, context):
-        if action.type == "send_message":
-            await send_to_chat(action.payload["text"])
-
-            # Record the user action so the model can see it on the next turn.
-            hidden = HiddenContextItem(
-                id="generated-item-id",
-                thread_id=thread.id,
-                created_at=datetime.now(),
-                content=f"User sent message: {action.payload['text']}",
-            )
-            # HiddenContextItems need to be manually saved because ChatKitServer
-            # only auto-saves streamed items, and HiddenContextItem should never be streamed to the client.
-            await self.store.add_thread_item(thread.id, hidden, context)
-
-            # Stream an updated widget back to the client.
-            updated_widget = build_message_widget(text=action.payload["text"])
-            async for event in stream_widget(
-                thread,
-                updated_widget,
-                generate_id=lambda item_type: self.store.generate_item_id(item_type, thread, context),
-            ):
-                yield event
-```
-
-Treat action payloads as untrusted input from the client.
-
-## Handle actions on the client
-
-Provide [`widgets.onAction`](https://openai.github.io/chatkit-js/api/openai/chatkit/type-aliases/widgetsoption) when creating ChatKit on the client; you can still forward follow-up actions to the server from your `onAction` callback with the `sendCustomAction()` command if needed.
-
-```ts
-const chatkit = useChatKit({
-  // ...
-  widgets: {
-    onAction: async (action, widgetItem) => {
-      if (action.type === "save_profile") {
-        const result = await saveProfile(action.payload);
-
-        // Optionally invoke a server action after client-side work completes.
-        await chatkit.sendCustomAction(
-            {
-                type: "save_profile_complete",
-                payload: { ...result, user_id: action.payload.user_id },
-            },
-            widgetItem.id,
-        );
-      }
-    },
-  },
-});
-```
-
-On the server, handle the follow-up action (`save_profile_complete`) in the `action` method to stream refreshed widgets or messages.
-
-## Customize how actions interact with loading states in widgets
-
-Use `loadingBehavior` to control how actions trigger different loading states in a widget.
-
-```jsx
-<Button
-    label="Send message"
-    onClickAction={{
-        type: "send_message",
-        loadingBehavior: "container",
-    }}
-/>
-```
-
-| Value       | Behavior                                                                                                                        |
-| ----------- | ------------------------------------------------------------------------------------------------------------------------------- |
-| `auto`      | The action will adapt to how it’s being used. (_default_)                                                                       |
-| `self`      | The action triggers loading state on the widget node that the action was bound to.                                              |
-| `container` | The action triggers loading state on the entire widget container. This causes the widget to fade out slightly and become inert. |
-| `none`      | No loading state                                                                                                                |
-
-### Using `auto` behavior
-
-Generally, we recommend using `auto`, which is the default. `auto` triggers loading states based on where the action is bound, for example:
-
-- `Button.onClickAction` → `self`
-- `Select.onChangeAction` → `none`
-- `Card.confirm.action` → `container`
diff --git a/docs/guides/keep-your-app-in-sync-with-chatkit.md b/docs/guides/keep-your-app-in-sync-with-chatkit.md
new file mode 100644
index 0000000..bbdfbc0
--- /dev/null
+++ b/docs/guides/keep-your-app-in-sync-with-chatkit.md
@@ -0,0 +1,77 @@
+# Keep your app in sync with ChatKit
+
+Use ChatKit’s client events to mirror runtime state into your host app so you can restore threads, gate your own UI, and safely call imperative helpers.
+
+At a high level:
+
+- Track the active `threadId` so you can restore the same thread after navigation or reloads.
+- Track loading and responding state to disable your own controls while ChatKit is busy.
+
+## Track the active thread
+
+Use `onThreadChange` to mirror ChatKit’s active thread into your own app state or router. Persist the `threadId` wherever you keep session state (for example, URL params, Redux, or local storage) so you can restore it later.
+
+## Track loading and responding state
+
+ChatKit exposes lifecycle events for thread loading and response streaming. Use them to:
+
+- Disable custom toolbars, buttons, or navigation while a response is in flight.
+- Avoid calling imperative helpers while ChatKit is already doing work.
+
+## Wire it all together in `useChatKit`
+
+Here’s a minimal React inbox that mirrors thread and loading state:
+
+```tsx
+import {ChatKit, useChatKit} from "@openai/chatkit-react";
+
+export function Inbox({clientToken}: { clientToken: string }) {
+  const {
+    control,
+    sendUserMessage,
+    focusComposer,
+    setThreadId,
+  } = useChatKit({
+    // ... your normal options (api, history, composer, etc.)
+
+    onThreadChange: ({threadId}) => setActiveThread(threadId),
+
+    onThreadLoadStart: () => setIsLoading(true),
+    onThreadLoadEnd: () => setIsLoading(false),
+
+    onResponseStart: () => setIsResponding(true),
+    onResponseEnd: () => setIsResponding(false),
+  });
+
+  const isBusy = isLoading || isResponding;
+
+  return (
+    <>
+      <Toolbar
+        disabled={isBusy}
+        onNewThread={() => !isBusy && setThreadId(undefined)}
+        onFocusComposer={() => !isBusy && focusComposer()}
+        onSendQuickMessage={(text) =>
+          !isBusy && sendUserMessage({text})
+        }
+      />
+      <ChatKit control={control} />
+    </>
+  );
+}
+```
+
+## Guard imperative helpers when ChatKit is busy
+
+Commands such as `sendUserMessage`, `focusComposer`, and `setThreadId` can reject if called during a thread load or while a response is streaming.
+
+Use your mirrored `isLoading` / `isResponding` state to:
+
+- Avoid calling commands when ChatKit is busy (as in the example above).
+- Disable your own buttons or menu items until ChatKit finishes.
+- Show “working…” affordances that line up with the actual ChatKit lifecycle.
+
+## Hook in your own UI state
+
+Once you have `threadId`, `isLoading`, and `isResponding` mirrored into your app, use them to drive your own UI; for example, disabling controls while ChatKit is busy or restoring the last active thread after navigation or reloads.
+
diff --git a/docs/guides/let-users-pick-tools-and-models.md b/docs/guides/let-users-pick-tools-and-models.md
new file mode 100644
index 0000000..05d0b52
--- /dev/null
+++ b/docs/guides/let-users-pick-tools-and-models.md
@@ -0,0 +1,138 @@
+# Let users pick tools and models
+
+This guide shows how to expose a tool menu and model picker in the composer UI, read the user’s choices as inference options on the server, and fork your inference pipeline based on those choices.
+
+At a high level:
+
+- `composer.tools` controls which tools appear in the composer tool menu (the plus button).
+- `composer.models` controls which models appear in the model picker in the composer below the text input.
+- The selected tool and model arrive as `inference_options` on the `UserMessageItem` in your `respond` method.
+
+## Configure tools in the composer
+
+Configure the tools that should appear in the composer tool menu when you initialize ChatKit on the client:
+
+```ts
+const chatkit = useChatKit({
+  // ...
+  composer: {
+    tools: [
+      {
+        id: "summarize",
+        icon: "book-open",
+        label: "Summarize",
+        placeholderOverride: "Summarize the current page or document.",
+      },
+      {
+        id: "search_tickets",
+        icon: "search",
+        label: "Search tickets",
+        shortLabel: "Search",
+        placeholderOverride: "Search support tickets for similar issues.",
+      },
+    ],
+  },
+});
+```
+
+Each entry defines a user-facing label/shortLabel and a stable `id` you’ll use on the server to decide how to handle the turn.
+
+## Configure the model picker in the composer
+
+Expose a small set of model choices so users can trade off speed vs quality.
+
+```ts
+const chatkit = useChatKit({
+  // ...
+  composer: {
+    models: [
+      {
+        id: "gpt-4.1-mini",
+        label: "Fast",
+        description: "Answers right away",
+      },
+      {
+        id: "gpt-4.1",
+        label: "Quality",
+        description: "All rounder"
+        default: true,
+      },
+    ],
+  },
+});
+```
+
+The selected model id flows through to your server so you can route requests to the right underlying model or configuration.
+
+## Read tool and model choices on the server
+
+On the server, `UserMessageItem.inference_options` carries the tool choice and model id for that turn.
+
+```python
+from chatkit.types import InferenceOptions
+
+
+class MyChatKitServer(ChatKitServer[RequestContext]):
+    async def respond(
+        self,
+        thread: ThreadMetadata,
+        input_user_message: UserMessageItem | None,
+        context: RequestContext,
+    ) -> AsyncIterator[ThreadStreamEvent]:
+        options = input_user_message and input_user_message.inference_options
+
+        model = options.model if options and options.model else "gpt-4.1-mini"
+        tool_choice = options.tool_choice.id if options and options.tool_choice else None
+
+        # Use `model` and `tool_choice` when building your model request...
+```
+
+If the user doesn’t pick anything explicit, `inference_options` may be `None` or have `model` / `tool_choice` unset; fall back to your defaults.
+
+## Fork your inference pipeline based on user choices
+
+Use the tool and model choices to branch into different agents, prompts, or tools.
+
+### Route to different tools or agents
+
+For example, use the composer’s tool id to decide which agent (or tool set) to run:
+
+```python
+if tool_choice == "summarize":
+    agent = summarization_agent
+elif tool_choice == "search_tickets":
+    agent = ticket_search_agent
+else:
+    agent = default_agent
+
+result = Runner.run_streamed(agent, input_items, context=agent_context)
+```
+
+You control which tools each agent exposes; the composer’s tool menu just lets the user express intent up front instead of relying purely on model heuristics.
+
+### Choose models per turn
+
+Use the selected model id to pick an underlying model or configuration when you call the OpenAI Responses API (or another provider):
+
+```python
+model = inference.model if inference and inference.model else "gpt-4.1-mini"
+
+response = await client.responses.create(
+    model=model,
+    input=...,
+    # other options
+)
+```
+
+You can also use the model choice as a coarse “mode” flag—for example, always enabling safer or more verbose prompting on certain models.
+
+### Combine tools and models
+
+Nothing stops you from combining both choices. A common pattern is:
+
+- Use the composer tool menu to decide **what kind of work** to do (summarization, search, drafting, etc.).
+- Use the model picker to decide **how heavy** the model pass should be (fast vs quality, cheap vs expensive).
+
+This keeps the chat UI simple while still giving advanced users control over how their requests are handled end to end.
+
+
diff --git a/docs/guides/let-your-app-draft-and-send-messages.md b/docs/guides/let-your-app-draft-and-send-messages.md
new file mode 100644
index 0000000..4460bb5
--- /dev/null
+++ b/docs/guides/let-your-app-draft-and-send-messages.md
@@ -0,0 +1,91 @@
+# Let your app draft and send messages
+
+Use ChatKit’s commands to let your app pre-fill the composer and send messages programmatically for quick replies, “ask again” buttons, or deep links from the rest of your UI.
+
+At a high level:
+
+- `setComposerValue` lets your app draft or edit the pending message.
+- `sendUserMessage` lets your app send a message without the user pressing Enter.
+
+## Get ChatKit commands from `useChatKit`
+
+When you call `useChatKit`, you can destructure commands alongside the `control` object you pass into `<ChatKit />`:
+
+```tsx
+import {ChatKit, useChatKit} from "@openai/chatkit-react";
+
+export function Inbox() {
+  const {
+    control,
+    setComposerValue,
+    sendUserMessage,
+    setThreadId,
+  } = useChatKit({
+    // ... your normal options (api, history, composer, etc.)
+  });
+
+  return <ChatKit control={control} />;
+}
+```
+
+The commands are safe to call as long as ChatKit is not currently loading a thread or streaming a response; combine them with the loading/response state from [**Keep your app in sync with ChatKit**](keep-your-app-in-sync-with-chatkit.md) when you need to guard calls.
+
+## Draft messages with `setComposerValue`
+
+Use `setComposerValue` to pre-fill or update the composer text from your own UI:
+
+- Quick-reply chips that insert a suggested reply.
+- “Ask again with more detail” buttons that tweak the last question.
+- Deep links from outside the chat that open a specific prompt.
+
+```tsx
+function QuickReplies({
+  setComposerValue,
+}: {
+  setComposerValue: (params: {text: string}) => Promise<void>;
+}) {
+  return (
+    <div className="quick-replies">
+      <button onClick={() => setComposerValue({text: "Can you summarize this thread?"})}>
+        Summarize this thread
+      </button>
+      <button onClick={() => setComposerValue({text: "Explain this like I'm five."})}>
+        Explain like I'm five
+      </button>
+    </div>
+  );
+}
+```
+
+`setComposerValue` only changes the draft text; the user can still edit it before sending, or you can pair it with `sendUserMessage` to fire immediately.
+
+## Send messages with `sendUserMessage`
+
+Use `sendUserMessage` when your app needs to initiate a turn directly—for example, from a custom toolbar button or a widget action handled on the client.
+
+```tsx
+export function Inbox() {
+  const {
+    control,
+    sendUserMessage,
+    setThreadId,
+  } = useChatKit({
+    // ...
+  });
+
+  const handleHelpClick = () => {
+    // Send a canned message from a fresh thread
+    sendUserMessage({text: "I need help with my billing.", newThread: true});
+  };
+
+  return (
+    <>
+      <button onClick={handleHelpClick}>Contact support</button>
+      <ChatKit control={control} />
+    </>
+  );
+}
+```
+
+You can also rely on the current active thread when calling `sendUserMessage` without `newThread: true`.
+
diff --git a/docs/guides/make-client-tool-calls.md b/docs/guides/make-client-tool-calls.md
deleted file mode 100644
index b4a2a68..0000000
--- a/docs/guides/make-client-tool-calls.md
+++ /dev/null
@@ -1,57 +0,0 @@
-# Make client tool calls
-
-Client tool calls let the agent invoke browser/app callbacks mid-inference. Register the tool on both client and server; when triggered, ChatKit pauses the model, sends the tool request to the client, and resumes with the returned result.
-
-!!! note "Prefer client effects for non-blocking updates"
-    Use client effects instead when you do not need to wait for the client callback response for the rest of your response. See [Send client effects](send-client-effects.md) for more details.
-
-## Define a client tool in your agent
-
-Set `ctx.context.client_tool_call` inside a tool and configure the agent to stop at that tool.
-
-Only one client tool call can run per turn. Include client tools in `stop_at_tool_names` so the model pauses while the client callback runs and returns its result.
-
-
-```python
-from agents import Agent, RunContextWrapper, StopAtTools, function_tool
-from chatkit.agents import AgentContext, ClientToolCall
-
-@function_tool(description_override="Read the user's current canvas selection.")
-async def get_selected_canvas_nodes(ctx: RunContextWrapper[AgentContext]) -> None:
-    ctx.context.client_tool_call = ClientToolCall(
-        name="get_selected_canvas_nodes",
-        arguments={"project": my_project()},
-    )
-
-assistant = Agent[AgentContext](
-    ...
-    tools=[get_selected_canvas_nodes],
-    # StopAtTools pauses model generation so the pending client callback can run and resume the run.
-    tool_use_behavior=StopAtTools(stop_at_tool_names=[get_selected_canvas_nodes.name]),
-)
-```
-
-## Register the client tool in ChatKit.js
-
-Provide a matching callback when initializing ChatKit on the client. The function name must match the `ClientToolCall.name`, and its return value is sent back to the server to resume the run.
-
-```ts
-const chatkit = useChatKit({
-  // ...
-  onClientTool: async ({name, params}) => {
-    if (name === "get_selected_canvas_nodes") {
-        const {project} = params;
-        const nodes = myCanvas.getSelectedNodes(project);
-        return {
-            nodes: nodes.map((node) => ({ id: node.id, kind: node.type })),
-        };
-    },
-  },
-});
-```
-
-## Stream and resume
-
-In `respond`, stream via `stream_agent_response` as usual. ChatKit emits a pending client tool call item; the frontend runs your registered client tool, posts the output back, and the server continues the run.
-
-When the client posts the tool result, ChatKit stores it as a `ClientToolCallItem`. The continued inference after the client tool call handler returns the result feeds both the call and its output back to the model through `ThreadItemConverter.client_tool_call_to_input`, which emits a `function_call` plus matching `function_call_output` so the model sees the browser-provided context.
diff --git a/docs/guides/pass-extra-app-context-to-your-model.md b/docs/guides/pass-extra-app-context-to-your-model.md
new file mode 100644
index 0000000..79494c9
--- /dev/null
+++ b/docs/guides/pass-extra-app-context-to-your-model.md
@@ -0,0 +1,166 @@
+# Pass extra app context to your model
+
+Sometimes the model needs information that is not part of the thread: the current route, user plan, selected document, feature flags, or host-app state. This guide shows several patterns for passing that extra context into your inference pipeline.
+
+At a high level:
+
+- Send app/user context from the client to your ChatKit server on every request.
+- Fetch context on demand with tools (including client tools).
+- Inject extra context as an additional model input item when you build the request.
+
+## Send app context with each request
+
+### Attach headers from `useChatKit`
+
+Use a custom `fetch` (or equivalent) when configuring `useChatKit` to attach app/user context via headers to every request:
+
+```tsx
+const chatkit = useChatKit({
+  api: {
+    // ...
+    fetch: (input, init) =>
+      fetch(input, {
+        ...init,
+        headers: {
+          // Make sure to pipe through headers sent by ChatKit
+          ...(init?.headers || {}),
+          "X-Org-Id": currentOrgId,
+          "X-Plan": currentPlan,
+        },
+      }),
+  },
+});
+```
+
+On the server, read these headers before calling `ChatKitServer.process` and add them to your request context:
+
+```python
+from dataclasses import dataclass
+
+
+@dataclass
+class RequestContext:
+    user_id: str
+    org_id: str
+    plan: str
+```
+
+Use this context in your `respond` method, tools, and store methods to keep the model and your business logic aware of the current app state.
+
+## Fetch context on demand with tools
+
+Sometimes you only need extra context for certain requests—fetch it on demand with tools instead of sending it for every turn.
+
+### Server tools that fetch app context
+
+Define a server tool that looks up app state (for example, the user’s current workspace or preferences) and returns a JSON payload to the model:
+
+```python
+@function_tool(description_override="Fetch the user's workspace context.")
+async def get_workspace_context(ctx: RunContextWrapper[AgentContext]):
+    workspace = await load_workspace(ctx.context.request_context.org_id)
+    return {
+        "workspace_id": workspace.id,
+        "features": workspace.feature_flags,
+    }
+```
+
+Include this tool in your agent so the model can call it when it needs that information.
+
+### Client tools for browser/app-only state
+
+When the context only exists on the client (selection, viewport, local app state), use a client tool:
+
+```python
+@function_tool(description_override="Read the user's current canvas selection.")
+async def get_canvas_selection(ctx: RunContextWrapper[AgentContext]) -> None:
+    ctx.context.client_tool_call = ClientToolCall(
+        name="get_canvas_selection",
+        arguments={},
+    )
+```
+
+On the client, implement the callback:
+
+```ts
+const chatkit = useChatKit({
+  // ...
+  onClientTool: async ({name, params}) => {
+    if (name === "get_canvas_selection") {
+      const selection = myCanvas.getSelection();
+      return {
+        nodes: selection.map((node) => {
+          name: node.name,
+          description: node.description,
+        }),
+      };
+    }
+  },
+});
+```
+
+ChatKit will send the client tool result back to the server and continue the run with that data included as model input.
+
+## Inject extra context as model input item
+
+You can also inject context directly as an extra model input item when you build the request.
+
+### Add a dedicated context item
+
+Before running your agent, prepend a short, structured context item describing app/user state:
+
+```python
+from openai.types.responses import ResponseInputTextParam
+
+
+extra_context = ResponseInputTextParam(
+    type="input_text",
+    text=(
+        "<APP_CONTEXT>\n"
+        f"user_id: {context.user_id}\n"
+        f"org_id: {context.org_id}\n"
+        f"plan: {context.plan}\n"
+        "</APP_CONTEXT>"
+    ),
+)
+
+input_items = [extra_context, *input_items]
+```
+
+Pair this with a short system prompt telling the model how to interpret the `<APP_CONTEXT>` block.
+
+### Combine ids + tools
+
+A useful pattern is to combine a lightweight context item with a follow-up tool call:
+
+1. Add an input item that contains a stable id or handle:
+
+   ```python
+   extra_context = ResponseInputTextParam(
+       type="input_text",
+       text=f"<WORKSPACE_REF id={workspace.id} />",
+   )
+   input_items = [extra_context, *input_items]
+   ```
+
+2. Provide a tool (server or client) that can fetch the full details when needed:
+
+   ```python
+   @function_tool(description_override="Fetch full workspace details.")
+   async def fetch_workspace(ctx: RunContextWrapper[AgentContext], id: str):
+       workspace = await load_workspace(id)
+       return {
+           "id": workspace.id,
+           "features": workspace.feature_flags,
+           "limits": workspace.limits,
+       }
+   ```
+
+3. In your prompt, tell the model:
+
+   - The `<WORKSPACE_REF>` tag carries the id it should use.
+   - It should call `fetch_workspace` when it needs more detail instead of guessing.
+
+This keeps your model inputs compact while still giving the model a reliable way to pull detailed context on demand.
+
+
diff --git a/docs/guides/persist-chatkit-data.md b/docs/guides/persist-chatkit-data.md
deleted file mode 100644
index 3421480..0000000
--- a/docs/guides/persist-chatkit-data.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Persist ChatKit threads and messages
-
-This guide has been consolidated into [Respond to a user message](respond-to-user-message.md).
diff --git a/docs/guides/prepare-your-app-for-production.md b/docs/guides/prepare-your-app-for-production.md
new file mode 100644
index 0000000..f458af3
--- /dev/null
+++ b/docs/guides/prepare-your-app-for-production.md
@@ -0,0 +1,255 @@
+# Prepare your app for production
+
+This guide covers the operational work you should do before rolling out a ChatKit‑powered experience in production:
+
+- Set up **localization** so prompts, system messages, and tool output match the user’s locale.
+- Configure **monitoring and logging** so you can debug issues and correlate ChatKit traffic with your backend traces.
+- Review **security and authentication** for your ChatKit endpoint.
+- Register and use **domain keys** to lock ChatKit down to your approved hostnames.
+
+Use it as a checklist alongside your own product’s launch process.
+
+## Localize prompts, UI copy, and tool output
+
+By the time you go live, you should have a clear story for which locales you support and how locale flows from the client into your backend and model prompts.
+
+ChatKit always picks a **single active locale** and:
+
+- Uses the **browser locale by default**.
+- Lets you **override the locale on the client** (for example, from your own locale picker) by passing the `locale` option when you initialize ChatKit.
+
+For every request to your ChatKit backend, the client sends an `Accept-Language` header with that single locale value. You can rely on this header to drive your own localization logic on the server.
+
+At a minimum:
+
+- **Decide which locales you support** (for example `["en", "fr", "de"]`) and what the default/fallback is.
+- **Localize tool output and error messages** so the assistant’s replies feel consistent with the rest of your product.
+
+For example, you might include locale in your per‑request context:
+
+```python
+from dataclasses import dataclass
+
+
+@dataclass
+class RequestContext:
+    user_id: str
+    locale: str
+```
+
+Then, when you build prompts or tool output, read `context.locale` and render language‑appropriate text using your localization system. For example, with `gettext`:
+
+```python
+from pathlib import Path
+import gettext
+
+from agents import RunContextWrapper, function_tool
+from chatkit.agents import AgentContext
+
+
+LOCALE_DIR = Path(__file__).with_suffix("").parent / "locales"
+_translations: dict[str, gettext.NullTranslations] = {}
+
+
+def get_translations(locale: str) -> gettext.NullTranslations:
+    """Return a gettext translation object for the given locale."""
+    if locale not in _translations:
+        _translations[locale] = gettext.translation(
+            "messages",  # your .po/.mo domain
+            localedir=LOCALE_DIR,
+            languages=[locale],
+            fallback=True,
+        )
+    return _translations[locale]
+
+
+@function_tool()
+async def load_document(
+    ctx: RunContextWrapper[AgentContext],
+    document_id: str,
+):
+    locale = ctx.context.request_context.locale
+    _ = get_translations(locale).gettext
+    await ctx.context.stream_progress(
+        icon="document",
+        text=_("Loading document…"),
+    )
+    doc = await get_document_by_id(document_id)
+    if not doc:
+        raise ValueError(_("We couldn’t find that document."))
+    return doc
+```
+
+When you call the model (for example via the OpenAI Responses API), include the user’s locale either directly in the prompt or as part of a system message so the model responds in the right language.
+
+## Monitor logs and errors
+
+You should be able to answer questions like “what went wrong for this user at this time?” and “are ChatKit requests healthy right now?” before you roll out broadly.
+
+### Capture client logs
+
+On the **client side**, subscribe to ChatKit’s log and error events and forward them into your own telemetry system, tagged with:
+
+- User identifier (or stable anonymous id).
+- Session or request id.
+- The current thread id.
+
+In React, use the `onLog` and `onError` options (mirroring the patterns in the ChatKit JS [Monitor logs](https://openai.github.io/chatkit-js/guides/monitor-logs/) guide):
+
+```tsx
+import { ChatKit, useChatKit } from "@openai/chatkit-react";
+
+export function SupportChat({
+  clientToken,
+  userId,
+}: {
+  clientToken: string;
+  userId: string;
+}) {
+  const { control } = useChatKit({
+    api: { clientToken },
+    onLog: ({ name, data }) =>
+      sendToTelemetry({
+        name,
+        // Avoid forwarding raw message text or tool arguments directly.
+        data: scrubSensitiveFields(data),
+        userId,
+      }),
+    onError: ({ error }) =>
+      sendToTelemetry({
+        name: "chatkit.error",
+        error: scrubSensitiveFields(error),
+        userId,
+      }),
+  });
+
+  return <ChatKit control={control} className="h-[600px]" />;
+}
+```
+
+With the web component, listen for `chatkit.log` and `chatkit.error` events:
+
+```ts
+const chatkit = document.getElementById("my-chat") as OpenAIChatKit;
+
+chatkit.addEventListener("chatkit.log", ({ detail }) => {
+  sendToTelemetry({
+    name: detail.name,
+    data: scrubSensitiveFields(detail.data),
+    userId,
+  });
+});
+
+chatkit.addEventListener("chatkit.error", (event) => {
+  sendToTelemetry({
+    name: "chatkit.error",
+    error: scrubSensitiveFields(event.detail.error),
+    userId,
+  });
+});
+```
+
+These events can include **PII and message contents**, so avoid blanket-forwarding entire payloads; instead, extract and forward only the fields you need (for example, error codes, item ids, thread ids, and high‑level event names) and/or scrub sensitive fields before sending them to your logging provider.
+
+Separately from your own telemetry, the ChatKit iframe sends **its own outbound telemetry** to OpenAI‑controlled endpoints (Datadog and `chatgpt.com`) for monitoring and debugging. These internal logs **do not contain PII or message input/output content** and are used only to monitor the health of the ChatKit experience.
+
+### Monitor your ChatKit endpoint
+
+On the **backend**, you should still capture basic logs around your ChatKit endpoint so you can correlate client telemetry with server behavior:
+
+- Incoming HTTP request (path, method, user id, thread id).
+- Calls to `ChatKitServer.process` and your `Store` implementation.
+- Outbound calls to OpenAI or other model providers.
+- Any errors raised from tools or your own business logic.
+
+
+## Security and authentication
+
+Production deployments should treat your ChatKit endpoint as a privileged backend:
+
+- **Authenticate every request** to your `/chatkit` endpoint (for example, with your existing session cookies, bearer tokens, or signed JWTs).
+- **Authorize access to threads and attachments** based on your own user and tenant model.
+- **Protect secrets** such as OpenAI API keys and internal service credentials in environment variables or a secret manager—never in source control.
+- **Validate inputs** before calling tools or downstream systems.
+
+You should also be explicit about how you handle **prompt injection**:
+
+- Treat all user text, attachments, and tool output as untrusted input.
+- Avoid building any `role="system"` model inputs from values that might come from the user (including fields like subject lines, titles, or descriptions).
+- Keep system messages static or derived only from trusted configuration so users cannot silently change your instructions to the model.
+
+### Authenticate your ChatKit endpoint
+
+The Python SDK expects your own app to handle authentication; `ChatKitServer` works with whatever `RequestContext` you choose. A common pattern is to:
+
+1. Authenticate the incoming HTTP request using your web framework (session middleware, OAuth bearer tokens, etc.).
+2. Build a `RequestContext` that includes the authenticated user id, org/tenant, and any roles or scopes.
+3. Pass that context into `server.process`.
+
+For example:
+
+```python
+from fastapi import Depends, FastAPI, HTTPException, Request, Response
+from fastapi.responses import StreamingResponse
+
+from chatkit.server import ChatKitServer, StreamingResult
+
+
+def get_current_user(request: Request) -> str:
+    # Replace this with your real auth: session cookies, JWTs, etc.
+    user_id = request.headers.get("x-user-id")
+    if not user_id:
+        raise HTTPException(status_code=401, detail="Unauthorized")
+    return user_id
+
+
+app = FastAPI()
+store = MyStore(...)
+server = MyChatKitServer(store)
+
+
+@app.post("/chatkit")
+async def chatkit_endpoint(
+    request: Request,
+    user_id: str = Depends(get_current_user),
+):
+    context = RequestContext(user_id=user_id, locale="en")
+    result = await server.process(await request.body(), context)
+    if isinstance(result, StreamingResult):
+        return StreamingResponse(result, media_type="text/event-stream")
+    return Response(content=result.json, media_type="application/json")
+```
+
+Inside your `Store` and tools, enforce per‑user or per‑tenant access by checking `context.user_id` (and any other identifiers you include) before returning or mutating data.
+
+### Handle PII and data retention
+
+Because ChatKit threads and items can contain user text, attachments, and tool output:
+
+- **Decide what you persist** and for how long. Implement retention policies in your `Store` (for example, delete threads older than N days).
+- **Avoid storing unnecessary PII** in thread metadata or tool return values.
+- **Encrypt data at rest** using your database’s built‑in features or application‑level encryption where needed.
+
+## Domain keys
+
+Domain keys lock ChatKit down to the hostnames you control. When you embed ChatKit in a web app, the client and iframe can use a domain key to prove that the page is allowed to load ChatKit.
+
+1. Visit the OpenAI domain allowlist page at `https://platform.openai.com/settings/organization/security/domain-allowlist`.
+2. Register each hostname that will host your ChatKit UI (for example, `app.example.com`, `support.example.com`).
+3. Copy the generated **domain key** for each entry.
+
+Your client configuration should include that `domainKey` alongside the URL to your ChatKit Python backend.
+
+```ts
+const options = {
+  api: {
+    url: "https://your-domain.com/api/chatkit",
+    // Copy this value from the domain allowlist entry.
+    domainKey: "your-domain-key",
+  },
+};
+```
+
+The ChatKit iframe will make an outbound request to `https://api.openai.com` to verify the domain key on load. If the key is missing or invalid, ChatKit will refuse to load, preventing unauthorized hostnames from embedding your ChatKit experience.
+
+When you go live, make sure all of your production hostnames are registered in the domain allowlist.
diff --git a/docs/guides/respond-to-user-message.md b/docs/guides/respond-to-user-message.md
index f124f0c..d5a1b69 100644
--- a/docs/guides/respond-to-user-message.md
+++ b/docs/guides/respond-to-user-message.md
@@ -268,13 +268,9 @@ async def load_document(ctx: RunContextWrapper[AgentContext], document_id: str):
 ## Next: add features
 
 - [Let users browse past threads](browse-past-threads.md)
-- [Accept attachments](accept-attachments.md)
-- [Make client tool calls](make-client-tool-calls.md)
-- [Send client effects](send-client-effects.md)
-- [Show progress for long-running tools](show-progress-for-long-running-tools.md)
-- [Stream widgets](stream-widgets.md)
-- [Handle widget actions](handle-widget-actions.md)
-- [Create custom forms](create-custom-forms.md)
+- [Accept rich user input](accept-rich-user-input.md)
+- [Update the client during a response](update-client-during-response.md)
+- [Build interactive responses with widgets](build-interactive-responses-with-widgets.md)
 - [Handle feedback](handle-feedback.md)
 - [Allow @-mentions in user messages](allow-mentions.md)
 - [Add annotations in assistant messages](add-annotations.md)
diff --git a/docs/guides/run-inference.md b/docs/guides/run-inference.md
deleted file mode 100644
index a72703c..0000000
--- a/docs/guides/run-inference.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Run inference
-
-This guide has been consolidated into [Respond to a user message](respond-to-user-message.md).
diff --git a/docs/guides/send-client-effects.md b/docs/guides/send-client-effects.md
deleted file mode 100644
index b014154..0000000
--- a/docs/guides/send-client-effects.md
+++ /dev/null
@@ -1,52 +0,0 @@
-# Send client effects
-
-Send ClientEffectEvent to trigger fire-and-forget UI work (such as refreshing a view, opening a modal, showing a toast) without creating thread items or pausing the model stream.
-
-!!! note "Client effects vs. client tool calls"
-    Client effects are ephemeral: they stream immediately to ChatKit.js, trigger your registered effect handler, and are not persisted to the thread history. Use client tool calls instead when you need a round-trip response from the client.
-
-## Stream a client effect from your server
-
-Yield client effects directly from the `respond` or `action` method:
-
-```python
-async def respond(...):
-    yield ClientEffectEvent(
-        name="highlight_text",
-        data={"index": 142, "length": 35},
-    )
-```
-
-Or from tools, through `AgentContext`:
-
-```python
-from agents import RunContextWrapper, function_tool
-from chatkit.agents import AgentContext
-from chatkit.types import ClientEffectEvent
-
-@function_tool()
-async def highlight_text(ctx: RunContextWrapper[AgentContext], index: int, length: int):
-    await ctx.context.stream(
-        ClientEffectEvent(
-            name="highlight_text",
-            data={"index": index, "length": length},
-        )
-    )
-```
-
-## Handle the client effect in ChatKit.js
-
-Register a client effect handler when initializing ChatKit on the client.
-
-```ts
-const chatkit = useChatKit({
-  // ...
-  onEffect: async ({name, data}) => {
-    if (name === "highlight_text") {
-        const {index, length} = data;
-        const nodes = highlightArticleText({index, length});
-        // No return value needed
-    },
-  },
-});
-```
diff --git a/docs/guides/serve-chatkit.md b/docs/guides/serve-chatkit.md
deleted file mode 100644
index be82fd7..0000000
--- a/docs/guides/serve-chatkit.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Serve ChatKit from your backend
-
-This guide has been consolidated into [Respond to a user message](respond-to-user-message.md).
diff --git a/docs/guides/show-progress-for-long-running-tools.md b/docs/guides/show-progress-for-long-running-tools.md
deleted file mode 100644
index 5d8d3ea..0000000
--- a/docs/guides/show-progress-for-long-running-tools.md
+++ /dev/null
@@ -1,104 +0,0 @@
-# Show progress for long-running tools
-
-Long-running tools can feel stalled without feedback. Use progress updates for lightweight status pings and workflows for structured, persisted task checklists.
-
-|                     | Progress updates (`ProgressUpdateEvent`) | Workflow items (`Workflow`, `WorkflowTask*`) |
-|---------------------|------------------------------------------|----------------------------------------------|
-| Purpose             | Quick, ephemeral status text             | Structured list of tasks with statuses       |
-| Persistence         | Not saved to the thread                  | Persisted as thread items                    |
-| UI                  | Inline, transient shimmer text           | Collapsible checklist widget                 |
-| When new content streams | Automatically cleared and replaced by streamed content | Remains visible above the streamed content       |
-| Best for            | Reporting current phase ("Indexing…")    | Multi-step plans users may revisit later     |
-| How to emit         | `ctx.context.stream(ProgressUpdateEvent(...))` | `start_workflow`, `add_workflow_task`, `update_workflow_task`, `end_workflow` |
-
-## Progress updates
-
-Emit `ProgressUpdateEvent` when you need lightweight, real-time status. They stream immediately to the client and disappear after the turn—they are not stored in the thread.
-
-### From tools
-
-Inside a tool, use `AgentContext.stream` to enqueue progress events. They are delivered to the client immediately and are not persisted as thread items.
-
-```python
-from agents import RunContextWrapper, function_tool
-from chatkit.agents import AgentContext
-from chatkit.types import ProgressUpdateEvent
-
-@function_tool()
-async def ingest_files(ctx: RunContextWrapper[AgentContext], paths: list[str]):
-    await ctx.context.stream(ProgressUpdateEvent(icon="upload", text="Uploading..."))
-    await upload(paths)
-
-    await ctx.context.stream(
-        ProgressUpdateEvent(icon="search", text="Indexing and chunking...")
-    )
-    await index_files(paths)
-
-    await ctx.context.stream(ProgressUpdateEvent(icon="check", text="Done"))
-```
-
-`stream_agent_response` will forward these events for you alongside any assistant text or tool call updates.
-
-### From custom pipelines
-
-If you are not using the Agents SDK, yield `ProgressUpdateEvent` directly from the `respond` or `action` methods while your backend works:
-
-```python
-async def respond(...):
-    yield ProgressUpdateEvent(icon="search", text="Searching tickets...")
-    results = await search_tickets()
-
-    yield ProgressUpdateEvent(icon="code", text="Generating summary...")
-    yield from await stream_summary(results)
-```
-
-Use short, action-oriented messages and throttle updates to meaningful stages instead of every percent to avoid noisy streams.
-
-## Workflow items
-
-Use workflows when you want a persisted, user-visible checklist of tasks. They render as a widget in the transcript and survive after the turn. Combine with progress updates if you need both a checklist and lightweight status text.
-
-Workflows support multiple task variants (custom, search, thought, file, image); see [`Task`](../../../api/chatkit/types/#chatkit.types.Task). Summaries shown when closing a workflow use [`WorkflowSummary`](../../../api/chatkit/types/#chatkit.types.WorkflowSummary) (for example, `CustomSummary` in the snippet below).
-
-Example streaming workflow updates using `AgentContext` helpers:
-
-```python
-from agents import RunContextWrapper, function_tool
-from chatkit.agents import AgentContext
-from chatkit.types import CustomSummary, CustomTask, Workflow
-
-@function_tool()
-async def long_running_tool_with_steps(ctx: RunContextWrapper[AgentContext]):
-    # Create an empty workflow container
-    await ctx.context.start_workflow(Workflow(type="custom", tasks=[]))
-
-    # Add and update the first task
-    discovery = CustomTask(title="Search data sources", status_indicator="loading")
-    await ctx.context.add_workflow_task(discovery)
-
-    # Run the first task
-    await search_my_data_sources()
-
-    await ctx.context.update_workflow_task(
-        discovery.model_copy(update={"status_indicator": "complete"}), task_index=0
-    )
-
-    # Add a follow-up task
-    summary = CustomTask(title="Summarize findings", status_indicator="loading")
-    await ctx.context.add_workflow_task(summary)
-
-    # Run the second task
-    await summarize_my_findings()
-
-    await ctx.context.update_workflow_task(
-        summary.model_copy(update={"status_indicator": "complete"}), task_index=1
-    )
-
-    # Close the workflow and collapse it in the UI
-    await ctx.context.end_workflow(
-        summary=CustomSummary(title="Analysis complete"),
-        expanded=False,
-    )
-```
-
-Workflows are saved as thread items by `stream_agent_response` when you yield the associated events; they show up for all participants and remain visible in history.
diff --git a/docs/guides/stream-thread-events.md b/docs/guides/stream-thread-events.md
deleted file mode 100644
index a6bb6c0..0000000
--- a/docs/guides/stream-thread-events.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Stream responses back to your user
-
-This guide has been consolidated into [Respond to a user message](respond-to-user-message.md).
diff --git a/docs/guides/stream-widgets.md b/docs/guides/stream-widgets.md
deleted file mode 100644
index ce0e270..0000000
--- a/docs/guides/stream-widgets.md
+++ /dev/null
@@ -1,108 +0,0 @@
-# Stream widgets
-
-Widgets are rich UI components that can be displayed in chat. Design .widget files in <https://widgets.chatkit.studio>, load them as WidgetTemplates, inject dynamic data to build widgets, and stream the rendered widgets as `WidgetItem`s to clients from the `respond` or `action` ChatKitServer methods or from tool calls.
-
-## Design widgets
-
-Use <https://widgets.chatkit.studio> to visually design cards, lists, forms, charts, and other widget components. Populate the **Data** panel with sample values to preview how the widget renders with real inputs.
-
-When the layout and bindings look correct, click **Export** to download the generated `.widget` file. Commit this file alongside the server code that builds and renders the widget.
-
-For wiring user interactions and handling widget actions on the server, see [Handle widget actions](handle-widget-actions.md).
-
-## Build widgets with `WidgetTemplate`
-
-Load the `.widget` file with `WidgetTemplate.from_file` and hydrate it with runtime data. Placeholders inside the `.widget` template (Jinja-style `{{ }}` expressions) are rendered before the widget is streamed.
-
-```python
-from chatkit.widgets import WidgetTemplate
-
-message_template = WidgetTemplate.from_file("widgets/channel_message.widget")
-
-def build_message_widget(user_name: str, message: str):
-    # Replace this helper with whatever your integration uses to build widgets.
-    return message_template.build(
-        {
-            "user_name": user_name,
-            "message": message,
-        }
-    )
-```
-
-`WidgetTemplate.build` accepts plain dicts or Pydantic models. Use `.build_basic` if you're working with a `BasicRoot` widget outside of streaming.
-
-## Stream widgets from `respond`
-
-Use `stream_widget` to emit a one-off widget or stream updates from an async generator.
-
-```python
-from chatkit.server import stream_widget
-
-async def respond(...):
-    user_name = "Harry Potter"
-    message = "Yer a wizard, Harry"
-    message_widget = build_message_widget(user_name=user_name, message=message)
-    async for event in stream_widget(
-        thread,
-        message_widget,
-        copy_text=f"Message to {user_name}: {message}",
-        generate_id=lambda item_type: self.store.generate_item_id(item_type, thread, context),
-    ):
-        yield event
-```
-
-To stream gradual updates, yield successive widget states from an async generator; `stream_widget` diffs and emits `ThreadItemUpdatedEvent`s for you.
-
-## Stream widgets from tools
-
-Tools can enqueue widgets via `AgentContext.stream_widget`; `stream_agent_response` forwards them to the client.
-
-```python
-from agents import RunContextWrapper, function_tool
-from chatkit.agents import AgentContext
-
-@function_tool(description_override="Display a sample widget to the user.")
-async def sample_widget(ctx: RunContextWrapper[AgentContext]):
-    message_widget = build_message_widget(...)
-    await ctx.context.stream_widget(message_widget)
-```
-
-## Stream widget updates
-
-The examples above return a fully completed static widget. You can also stream an updating widget by yielding new versions of the widget from a generator function. The ChatKit framework will send updates for the parts of the widget that have changed.
-
-!!! note "Text streaming support"
-    Currently, only `<Text>` and `<Markdown>` components marked with an `id` have their text updates streamed. Other diffs will forgo the streaming UI and replace and rerender parts of the widget client-side.
-
-```python
-from typing import AsyncGenerator
-
-from agents import RunContextWrapper, function_tool
-from chatkit.agents import AgentContext, Runner
-from chatkit.widgets import WidgetRoot
-
-@function_tool
-async def draft_message_to_harry(ctx: RunContextWrapper[AgentContext]):
-    # message_generator is your model/tool function that streams text
-    message_result = Runner.run_streamed(message_generator, "Draft a message to Harry.")
-
-    async def widget_generator() -> AsyncGenerator[WidgetRoot, None]:
-        message = ""
-        async for event in message_result.stream_events():
-            if event.type == "raw_response_event" and event.data.type == "response.output_text.delta":
-                message += event.data.delta
-                yield build_message_widget(
-                    user_name="Harry Potter",
-                    message=message,
-                )
-
-        # Final render after streaming completes.
-        yield build_message_widget(
-            user_name="Harry Potter",
-            message=message,
-        )
-
-    await ctx.context.stream_widget(widget_generator())
-```
-
-The inner generator collects the streamed text events and rebuilds the widget with the latest message so the UI updates incrementally.
diff --git a/docs/guides/update-client-during-response.md b/docs/guides/update-client-during-response.md
new file mode 100644
index 0000000..b54dd30
--- /dev/null
+++ b/docs/guides/update-client-during-response.md
@@ -0,0 +1,164 @@
+# Update the client during a response
+
+Keep your UI responsive while the server is working: stream progress text, trigger client-side effects, and run client tools mid-response without blocking everything else.
+
+This guide covers three patterns:
+
+- Progress updates for lightweight status while tools run
+- Client effects for fire-and-forget UI behavior
+- Client tools for round-trips to the browser/app during inference
+
+## Show progress while tools run
+
+Use `ProgressUpdateEvent` when you need lightweight, real-time status. These updates stream immediately to the client and disappear after the turn—they are not stored in the thread.
+
+### From tools
+
+Inside a tool, use `AgentContext.stream` to enqueue progress events. They are delivered to the client immediately and are not persisted as thread items.
+
+```python
+from agents import RunContextWrapper, function_tool
+from chatkit.agents import AgentContext
+from chatkit.types import ProgressUpdateEvent
+
+
+@function_tool()
+async def ingest_files(ctx: RunContextWrapper[AgentContext], paths: list[str]):
+    await ctx.context.stream(ProgressUpdateEvent(icon="upload", text="Uploading..."))
+    await upload(paths)
+
+    await ctx.context.stream(
+        ProgressUpdateEvent(icon="search", text="Indexing and chunking...")
+    )
+    await index_files(paths)
+
+    await ctx.context.stream(ProgressUpdateEvent(icon="check", text="Done"))
+```
+
+`stream_agent_response` will forward these events for you alongside any assistant text or tool call updates.
+
+### From custom pipelines
+
+If you are not using the Agents SDK, yield `ProgressUpdateEvent` directly from your `respond` or `action` methods while your backend works:
+
+```python
+async def respond(...):
+    yield ProgressUpdateEvent(icon="search", text="Searching tickets...")
+    results = await search_tickets()
+
+    yield ProgressUpdateEvent(icon="code", text="Generating summary...")
+    yield from await stream_summary(results)
+```
+
+Use short, action-oriented messages and throttle updates to meaningful stages instead of every percent to avoid noisy streams.
+
+## Trigger client-side effects without blocking
+
+Send `ClientEffectEvent` to trigger fire-and-forget UI work (such as refreshing a view, opening a modal, or showing a toast) without creating thread items or pausing the model stream.
+
+Client effects are ephemeral: they stream immediately to ChatKit.js, trigger your registered effect handler, and are not persisted to the thread history. Use client tool calls instead when you need a round-trip response from the client.
+
+### Stream a client effect from your server
+
+Yield client effects directly from the `respond` or `action` method:
+
+```python
+async def respond(...):
+    yield ClientEffectEvent(
+        name="highlight_text",
+        data={"index": 142, "length": 35},
+    )
+```
+
+Or from tools, through `AgentContext`:
+
+```python
+from agents import RunContextWrapper, function_tool
+from chatkit.agents import AgentContext
+from chatkit.types import ClientEffectEvent
+
+
+@function_tool()
+async def highlight_text(ctx: RunContextWrapper[AgentContext], index: int, length: int):
+    await ctx.context.stream(
+        ClientEffectEvent(
+            name="highlight_text",
+            data={"index": index, "length": length},
+        )
+    )
+```
+
+### Handle the client effect in ChatKit.js
+
+Register a client effect handler when initializing ChatKit on the client:
+
+```ts
+const chatkit = useChatKit({
+  // ...
+  onEffect: async ({name, data}) => {
+    if (name === "highlight_text") {
+      const {index, length} = data;
+      highlightArticleText({index, length});
+      // No return value needed
+    }
+  },
+});
+```
+
+## Call client tools mid-inference
+
+Client tool calls let the agent invoke browser/app callbacks mid-inference. Register the tool on both client and server; when triggered, ChatKit pauses the model, sends the tool request to the client, and resumes with the returned result.
+
+Use client effects instead when you do not need to wait for the client callback response for the rest of your response.
+
+### Define a client tool in your agent
+
+Set `ctx.context.client_tool_call` inside a tool and configure the agent to stop at that tool. Only one client tool call can run per turn. Include client tools in `stop_at_tool_names` so the model pauses while the client callback runs and returns its result.
+
+```python
+from agents import Agent, RunContextWrapper, StopAtTools, function_tool
+from chatkit.agents import AgentContext, ClientToolCall
+
+
+@function_tool(description_override="Read the user's current canvas selection.")
+async def get_selected_canvas_nodes(ctx: RunContextWrapper[AgentContext]) -> None:
+    ctx.context.client_tool_call = ClientToolCall(
+        name="get_selected_canvas_nodes",
+        arguments={"project": my_project()},
+    )
+
+
+assistant = Agent[AgentContext](
+    ...,
+    tools=[get_selected_canvas_nodes],
+    # StopAtTools pauses model generation so the pending client callback can run and resume the run.
+    tool_use_behavior=StopAtTools(stop_at_tool_names=[get_selected_canvas_nodes.name]),
+)
+```
+
+### Register the client tool in ChatKit.js
+
+Provide a matching callback when initializing ChatKit on the client. The function name must match the `ClientToolCall.name`, and its return value is sent back to the server to resume the run.
+
+```ts
+const chatkit = useChatKit({
+  // ...
+  onClientTool: async ({name, params}) => {
+    if (name === "get_selected_canvas_nodes") {
+      const {project} = params;
+      const nodes = myCanvas.getSelectedNodes(project);
+      return {
+        nodes: nodes.map((node) => ({id: node.id, kind: node.type})),
+      };
+    }
+  },
+});
+```
+
+### Stream and resume
+
+In `respond`, stream via `stream_agent_response` as usual. ChatKit emits a pending client tool call item; the frontend runs your registered client tool, posts the output back, and the server continues the run.
+
+When the client posts the tool result, ChatKit stores it as a `ClientToolCallItem`. The continued inference after the client tool call handler returns the result feeds both the call and its output back to the model through `ThreadItemConverter.client_tool_call_to_input`, which emits a `function_call` plus matching `function_call_output` so the model sees the browser-provided context.
+
+
diff --git a/docs/index.md b/docs/index.md
index 7512e5d..64cfcd9 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,3 +1,13 @@
-# ChatKit Python documentation
+# ChatKit Python SDK
 
-Welcome to the ChatKit Python documentation. Use the navigation to browse guides and API reference.
+Welcome to the ChatKit Python SDK documentation. This overview page links to the most important places to get started.
+
+- [Quick start](quickstart.md)
+- [Core concepts](concepts/threads.md)
+- [Respond to a user message](guides/respond-to-user-message.md)
+- [Prepare your app for production](guides/prepare-your-app-for-production.md)
+- [API reference](api/chatkit/index.md)
+- [ChatKit JS docs](https://openai.github.io/chatkit-js/)
+- [Release process / changelog](release.md)
+
+Use the navigation on the left to browse the full set of guides and API reference.
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index 00e5080..2e4d043 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -37,27 +37,29 @@ plugins:
         - docs/gen_ref_pages.py
 
 nav:
-  - Home: index.md
+  - Overview: index.md
   - Quick start: quickstart.md
   - Concepts:
       - Threads and items: concepts/threads.md
       - Thread stream events: concepts/thread-stream-events.md
+      - Tools: concepts/tools.md
       - Widgets: concepts/widgets.md
       - Actions: concepts/actions.md
       - Entities: concepts/entities.md
   - Guides:
       - Respond to a user message: guides/respond-to-user-message.md
       - Let users browse past threads: guides/browse-past-threads.md
-      - Accept attachments: guides/accept-attachments.md
-      - Make client tool calls: guides/make-client-tool-calls.md
-      - Send client effects: guides/send-client-effects.md
-      - Show progress for long-running tools: guides/show-progress-for-long-running-tools.md
-      - Stream widgets: guides/stream-widgets.md
-      - Handle widget actions: guides/handle-widget-actions.md
-      - Create custom forms: guides/create-custom-forms.md
-      - Handle feedback: guides/handle-feedback.md
-      - Allow @-mentions in user messages: guides/allow-mentions.md
+      - Update the client during a response: guides/update-client-during-response.md
+      - Accept rich user input: guides/accept-rich-user-input.md
+      - Let users pick tools and models: guides/let-users-pick-tools-and-models.md
+      - Pass extra app context to your model: guides/pass-extra-app-context-to-your-model.md
+      - Update the client during a response: guides/update-client-during-response.md
+      - Build interactive responses with widgets: guides/build-interactive-responses-with-widgets.md
       - Add annotations in assistant messages: guides/add-annotations.md
+      - Keep your app in sync with ChatKit: guides/keep-your-app-in-sync-with-chatkit.md
+      - Let your app draft and send messages: guides/let-your-app-draft-and-send-messages.md
+      - Handle feedback: guides/handle-feedback.md
+      - Prepare your app for production: guides/prepare-your-app-for-production.md
   - API Reference:
       - Overview: api/chatkit/index.md
       - Modules:

From e8c8783997c6411d9375155cad52fd47c3b8817a Mon Sep 17 00:00:00 2001
From: Jiwon Kim <jiwon@openai.com>
Date: Fri, 5 Dec 2025 02:19:39 -0800
Subject: [PATCH 4/4] fix links

---
 docs/concepts/entities.md              |  2 +-
 docs/concepts/thread-stream-events.md  |  6 +++---
 docs/concepts/threads.md               | 10 +++++-----
 docs/concepts/widgets.md               |  2 +-
 docs/guides/respond-to-user-message.md |  2 +-
 docs/quickstart.md                     |  2 +-
 mkdocs.yml                             |  1 -
 7 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/docs/concepts/entities.md b/docs/concepts/entities.md
index 805e031..a9f2b55 100644
--- a/docs/concepts/entities.md
+++ b/docs/concepts/entities.md
@@ -27,4 +27,4 @@ Users can tag your entities in the composer using @-mentions.
 
 - The [Entity](https://openai.github.io/chatkit-js/api/openai/chatkit-react/type-aliases/entity/) TypeScript type definition
 - The [UserMessageTagContent](../../api/chatkit/types/#chatkit.types.UserMessageTagContent) Pydantic model definition
-- [Allow @-mentions in user messages](../guides/allow-mentions.md).
+- [Accept rich user input](../guides/accept-rich-user-input.md#-mentions-tag-entities-in-user-messages).
diff --git a/docs/concepts/thread-stream-events.md b/docs/concepts/thread-stream-events.md
index 1f38327..9f85638 100644
--- a/docs/concepts/thread-stream-events.md
+++ b/docs/concepts/thread-stream-events.md
@@ -29,13 +29,13 @@ Stream [`ErrorEvent`](../../api/chatkit/types/#chatkit.types.ErrorEvent)s for us
 
 Stream [`ProgressUpdateEvent`](../../api/chatkit/types/#chatkit.types.ProgressUpdateEvent)s to show the user transient status while work is in flight.
 
-See [Show progress for long-running tools](../guides/show-progress-for-long-running-tools.md) for more info.
+See [Show progress while tools run](../guides/update-client-during-response.md#show-progress-while-tools-run) for more info.
 
 ## Client effects
 
 Use [`ClientEffectEvent`](../../api/chatkit/types/#chatkit.types.ClientEffectEvent) to trigger fire-and-forget behavior on the client such as opening a dialog or pushing updates.
 
-See [Send client effects](../guides/send-client-effects.md) for more info.
+See [Trigger client-side effects without blocking](../guides/update-client-during-response.md#trigger-client-side-effects-without-blocking) for more info.
 
 ## Stream options
 
@@ -43,4 +43,4 @@ See [Send client effects](../guides/send-client-effects.md) for more info.
 
 
 ## Related guides
-- [Stream responses back to your user](../guides/stream-thread-events.md)
+- [Respond to a user message](../guides/respond-to-user-message.md)
diff --git a/docs/concepts/threads.md b/docs/concepts/threads.md
index 94570c3..1d96c93 100644
--- a/docs/concepts/threads.md
+++ b/docs/concepts/threads.md
@@ -43,7 +43,7 @@ Thread items serve two primary purposes in ChatKit:
 
 Your server's [`respond`](../../api/chatkit/server/#chatkit.server.ChatKitServer.respond) logic reads thread items to construct input for the model input. This ensures the model sees the full conversational context both during an active response and when a user resumes a past thread. 
 
-See [Compose model input](../guides/compose-model-input.md).
+See [Respond to a user message](../guides/respond-to-user-message.md) for a full walkthrough.
 
 ### UI rendering
 
@@ -60,7 +60,7 @@ On the client, ChatKit.js renders items incrementally as they stream in for the
 - Attachment metadata
 
 
-User text is not Markdown-rendered, but it may contain [@-mentions](../guides/allow-mentions.md) if your integration enables them.
+User text is not Markdown-rendered, but it may contain [@-mentions](../guides/accept-rich-user-input.md#-mentions-tag-entities-in-user-messages) if your integration enables them.
 
 
 ### Assistant messages
@@ -115,9 +115,9 @@ You can override the converter when you need custom behavior. For example:
 Custom conversion is typically paired with prompting so the model receives a coherent representation of the conversation.
 
 ## Related guides
-- [Persist ChatKit threads and messages](../guides/persist-chatkit-data.md)
-- [Compose model inputs](../guides/compose-model-input.md)
+- [Respond to a user message](../guides/respond-to-user-message.md)
+- [Pass extra app context to your model](../guides/pass-extra-app-context-to-your-model.md)
 - [Add annotations in assistant messages](../guides/add-annotations.md)
-- [Allow @-mentions in user messages](../guides/allow-mentions.md)
+- [Accept rich user input](../guides/accept-rich-user-input.md#-mentions-tag-entities-in-user-messages)
 - [Handle feedback](../guides/handle-feedback.md)
 - [Let users browse past threads](../guides/browse-past-threads.md)
diff --git a/docs/concepts/widgets.md b/docs/concepts/widgets.md
index a80266d..b52ee1b 100644
--- a/docs/concepts/widgets.md
+++ b/docs/concepts/widgets.md
@@ -35,7 +35,7 @@ Exported `.widget` files are JSON blobs that include the widget template, the ex
 
 ## WidgetItem
 
-[`WidgetItem`](../../api/chatkit/types/#chatkit.types.WidgetItem) represents a widget rendered as a [thread item](thread-items.md) in the chat UI. In addition to a reference to the widget instance, it contains a `copy_text` field that represents the text value copied to the clipboard when the user clicks the copy button below the response.
+[`WidgetItem`](../../api/chatkit/types/#chatkit.types.WidgetItem) represents a widget rendered as a [thread item](threads.md#what-are-thread-items) in the chat UI. In addition to a reference to the widget instance, it contains a `copy_text` field that represents the text value copied to the clipboard when the user clicks the copy button below the response.
 
 ## Entity previews
 
diff --git a/docs/guides/respond-to-user-message.md b/docs/guides/respond-to-user-message.md
index d5a1b69..f3d0882 100644
--- a/docs/guides/respond-to-user-message.md
+++ b/docs/guides/respond-to-user-message.md
@@ -272,5 +272,5 @@ async def load_document(ctx: RunContextWrapper[AgentContext], document_id: str):
 - [Update the client during a response](update-client-during-response.md)
 - [Build interactive responses with widgets](build-interactive-responses-with-widgets.md)
 - [Handle feedback](handle-feedback.md)
-- [Allow @-mentions in user messages](allow-mentions.md)
+- [Allow @-mentions in user messages](accept-rich-user-input.md#-mentions-tag-entities-in-user-messages)
 - [Add annotations in assistant messages](add-annotations.md)
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 0695e35..dfd30c4 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -65,7 +65,7 @@ Install the ChatKit Python package and expose a single `/chatkit` endpoint that
 pip install openai-chatkit fastapi uvicorn
 ```
 
-Create `main.py` with a minimal server that is hard-coded to always reply “Hello, world!” - you'll replace this with an actual call to a model in [Respond]
+Create `main.py` with a minimal server that is hard-coded to always reply “Hello, world!”—you'll replace this with an actual call to a model in [Respond to a user message](guides/respond-to-user-message.md).
 
 ```python
 # Other imports omitted for brevity; see the starter repo for a runnable file with all imports.
diff --git a/mkdocs.yml b/mkdocs.yml
index 2e4d043..ae083ac 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -49,7 +49,6 @@ nav:
   - Guides:
       - Respond to a user message: guides/respond-to-user-message.md
       - Let users browse past threads: guides/browse-past-threads.md
-      - Update the client during a response: guides/update-client-during-response.md
       - Accept rich user input: guides/accept-rich-user-input.md
       - Let users pick tools and models: guides/let-users-pick-tools-and-models.md
       - Pass extra app context to your model: guides/pass-extra-app-context-to-your-model.md