From b08f11f5bf7b09798367f5ee68da26436201322c Mon Sep 17 00:00:00 2001
From: zak <zak.knill@ably.com>
Date: Thu, 11 Dec 2025 11:52:27 +0000
Subject: [PATCH 01/13] ai-transport: add message per response doc

Add doc explaining streaming tokens with appendMessage and update
compaction allowing message-per-response history.
---
 src/data/nav/aitransport.ts                   |   9 +
 .../token-streaming/message-per-response.mdx  | 495 ++++++++++++++++++
 2 files changed, 504 insertions(+)
 create mode 100644 src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx

diff --git a/src/data/nav/aitransport.ts b/src/data/nav/aitransport.ts
index a0cea2f5cc..4b892d74ac 100644
--- a/src/data/nav/aitransport.ts
+++ b/src/data/nav/aitransport.ts
@@ -16,6 +16,15 @@ export default {
           link: '/docs/ai-transport',
           index: true,
         },
+        {
+          name: 'Token streaming',
+          pages: [
+            {
+              name: 'Message per response',
+              link: '/docs/ai-transport/features/token-streaming/message-per-response',
+            },
+          ],
+        },
       ],
     },
     {
diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
new file mode 100644
index 0000000000..41a21ba555
--- /dev/null
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
@@ -0,0 +1,495 @@
+---
+title: Message per response
+meta_description: "Stream individual tokens from AI models into a single message over Ably."
+---
+
+Stream LLM and generative AI responses efficiently by appending individual tokens to a single
+message on an Ably channel. This pattern creates one complete response message in channel history
+while delivering tokens in realtime.
+
+## Overview
+
+The message-per-response pattern enables you to stream AI-generated content as individual tokens in
+realtime, while maintaining a clean, compacted message history. Each AI response becomes a single
+message that grows as tokens are appended, resulting in efficient storage and easy retrieval of
+complete responses.
+
+### When to use this pattern
+
+This approach is ideal when:
+
+- You want each complete AI response stored as a single message in history.
+- You want clients joining mid-stream to catch up efficiently without processing thousands of
+  individual tokens.
+- Your application displays progressive AI responses that build up over time.
+
+### How it works
+
+1. **Initial message**: When an AI response begins, publish an initial message with `message.create`
+   action to the Ably channel with an empty or the first token as content.
+2. **Token streaming**: Append subsequent tokens to the original message by publishing those tokens
+   with the `message.append` action.
+3. **Live Delivery**: Clients subscribed to the channel receive each appended token in real-time, allowing
+   them to progressively render the response.
+4. **Compacted history**: The channel history contains only one message per AI response,
+   which includes all tokens appended to it concatenated together.
+
+You do not need to mark the message or token stream as completed; the final message will
+automatically have the full response with all tokens appended to it.
+
+## Setup
+
+Message append functionality requires the "Message annotations, updates, and deletes" [channel rule](/docs/channels#rules) enabled for your channel or [namespace](/docs/channels#namespaces). This rule automatically enables message persistence.
+
+To enable the channel rule:
+
+1. Go to the [Ably dashboard](https://www.ably.com/dashboard) and select your app.
+2. Navigate to the "Configuration" > "Rules" section from the left-hand navigation bar.
+3. Choose "Add new rule".
+4. Enter a channel name or namespace pattern (e.g. `ai:*` for all channels starting with `ai:`).
+5. Select the "Message annotations, updates, and deletes" rule from the list.
+6. Click "Create channel rule".
+
+The examples in this guide use the `ai:` namespace prefix, which assumes you have configured the rule for `ai:*`.
+
+### Message size limits
+
+Standard Ably message [size limits](/docs/platform/pricing/limits#message) apply to the complete concatenated message. The system validates size limits before accepting append operations. If appending a token would exceed the maximum message size, the append is rejected.
+
+## Publishing tokens
+
+You should publish tokens from a [Realtime](/docs/api/realtime-sdk) client, which maintains a
+persistent connection to the Ably service. This allows you to publish at very high message rates
+with the lowest possible latencies, while preserving guarantees around message delivery order.
+For more information, see [Realtime and REST](/docs/basics#realtime-and-rest).
+
+[Channels](/docs/channels) are used to separate message traffic into different topics.
+For token streaming, each conversation or session typically has its own channel.
+
+Use the [`get()`](/docs/api/realtime-sdk/channels#get) method to create or retrieve a channel instance:
+
+<Code>
+```javascript
+const channel = realtime.channels.get('ai:{{RANDOM_CHANNEL_NAME}}');
+```
+</Code>
+
+To start streaming an AI response, publish the initial message. Then append each subsequent token
+to that message as it arrives from the AI model:
+
+<Code>
+```javascript
+// Example: stream yields string tokens like 'Hello', ' world', '!'
+
+// Publish initial message and capture the serial for appending tokens
+const { serials: [msgSerial] } = await channel.publish('response', { data: '' });
+
+for await (const token of stream) {
+  // Append each token as it arrives
+  channel.appendMessage(msgSerial, token);
+}
+```
+</Code>
+
+When publishing tokens, don't await the `channel.appendMessage()` call. Ably rolls up acknowledgments
+and debounces them for efficiency, which means awaiting each append would unnecessarily slow down
+your token stream. Messages are still published in the order that `appendMessage()` is called, so delivery
+order is not affected.
+
+Append only supports concatenating data of the same type as the original message. For example, if
+the initial message data is a string, all appended tokens must also be strings. If the initial
+message data is binary, all appended tokens must be binary.
+
+This pattern allows publishing append operations for multiple concurrent model responses on the same
+channel. As long as you append to the correct message serial, tokens from different responses will
+not interfere with each other, and the final concatenated message for each response will contain only the tokens
+from that response.
+
+### Complete publish example
+
+The following example shows how to stream an AI response, publishing the first token as the initial message and appending subsequent tokens:
+
+<Code>
+```javascript
+const realtime = new Ably.Realtime('{{API_KEY}}');
+const channel = realtime.channels.get('ai:responses');
+
+async function streamAIResponse(prompt) {
+  // Example: stream yields string tokens like 'Hello', ' world', '!'
+  const stream = await getAIModelStream(prompt);
+
+  let messageSerial;
+
+  for await (const token of stream) {
+    if (!messageSerial) {
+      // First token: create the message and get serial
+      const response = await channel.publish('ai-response', token);
+      messageSerial = response.serials[0];
+    } else {
+      // Subsequent tokens: append without awaiting
+      channel.appendMessage(messageSerial, token);
+    }
+  }
+}
+```
+</Code>
+
+## Subscribing to token streams
+
+Subscribers receive different message actions depending on when they join and how they're retrieving
+messages.
+
+When subscribed to a channel, clients receive the initial message with the `message.create` action,
+followed by each token as a `message.append` action in real-time.
+
+<Code>
+```javascript
+const channel = realtime.channels.get('ai:responses');
+
+// Track responses by message serial
+const responses = new Map();
+
+await channel.subscribe((msg) => {
+  switch (msg.action) {
+    case 'message.create':
+      // New response started
+      responses.set(msg.serial, msg.data);
+      break;
+    case 'message.append':
+      // Append token to existing response
+      const current = responses.get(msg.serial) || '';
+      responses.set(msg.serial, current + msg.data);
+      break;
+    case 'message.update':
+      // Replace entire response content
+      responses.set(msg.serial, msg.data);
+      break;
+  }
+});
+```
+</Code>
+
+Each `message.append` event contains only the new token fragment in `msg.data`, not the full
+concatenated response.
+
+Occasionally you may receive a `message.update` action, which indicates that the channel needs to stream the entire message data so far. For example, this can happen if the client [resumes](/docs/connect/states#resume) after a transient disconnection and the channel needs to resynchronize the full message state. In this case, `msg.data` contains the complete response up to that point. For `message.update` events, you should replace the entire response content.
+
+## Client hydration
+
+Clients joining a channel or recovering from disconnection can efficiently catchup using rewind or
+history. For temporary disconnections, Ably's automatic [connection recovery](docs/connect/states#connection-state-recovery)
+ensures that clients receive all missed tokens in order.
+
+By using either rewind or history with `untilAttach`, clients can efficiently hydrate the existing
+response state without needing to process every individual token. Both rewind and history deliver
+concatenated responses as `message.update` events and seamlessly transition from historical
+responses to live `message.append` events.
+
+### Using rewind
+
+[Rewind](/docs/channels/options/rewind) attaches to a channel starting from a point in the past, delivering complete concatenated
+messages as `message.update` events.
+
+
+<Code>
+```javascript
+// Use rewind to receive recent historical messages
+const channel = realtime.channels.get('ai:{{RANDOM_CHANNEL_NAME}}', {
+  params: { rewind: '2m' } // or rewind: '10' for message count
+});
+
+// Track responses by message serial
+const responses = new Map();
+
+await channel.subscribe((msg) => {
+  switch (msg.action) {
+    case 'message.create':
+      // New response started
+      responses.set(msg.serial, msg.data);
+      break;
+    case 'message.append':
+      // Append token to existing response
+      const current = responses.get(msg.serial) || '';
+      responses.set(msg.serial, current + msg.data);
+      break;
+    case 'message.update':
+      // Replace entire response content
+      responses.set(msg.serial, msg.data);
+      break;
+  }
+});
+```
+</Code>
+
+### Using history with untilAttach
+
+The `untilAttach` option provides [continuous history](/docs/storage-history/history#continuous-history)
+from the point of attachment backward:
+
+<Code>
+```javascript
+const channel = realtime.channels.get('ai:responses');
+
+const responses = new Map();
+
+// Subscribe to live messages (implicitly attaches the channel)
+await channel.subscribe((msg) => {
+  switch (msg.action) {
+    case 'message.create':
+      responses.set(msg.id, msg.data);
+      break;
+    case 'message.append':
+      const current = responses.get(msg.id) || '';
+      responses.set(msg.id, current + msg.data);
+      break;
+    case 'message.update':
+      responses.set(msg.id, msg.data);
+      break;
+  }
+});
+
+// Fetch history up until the point of attachment
+let page = await channel.history({ untilAttach: true });
+
+// Paginate backwards through history
+while (page) {
+  // Messages are newest-first
+  for (const message of page.items) {
+    // message.data contains the full concatenated text
+    responses.set(message.id, message.data);
+  }
+
+  // Move to next page if available
+  page = page.hasNext() ? await page.next() : null;
+}
+```
+</Code>
+
+### Hydrating an in-progress response
+
+A common pattern is to persist completed responses in your database while using Ably for streaming in-progress responses. When clients reconnect, they load completed responses from your database first, then use Ably to catch up on any response that was still in progress.
+
+#### Hydrate using rewind
+
+Load completed responses from your database, then use rewind to catch up on any in-progress response, skipping messages for responses already loaded:
+
+<Code>
+```javascript
+// Load completed responses from your database
+const completedResponses = await loadResponsesFromDatabase();
+
+const channel = realtime.channels.get('ai:responses', {
+  params: { rewind: '2m' }
+});
+
+await channel.subscribe((msg) => {
+  const responseId = msg.extras?.headers?.responseId;
+
+  // Skip messages for responses already loaded from database
+  if (completedResponses.has(responseId)) {
+    return;
+  }
+
+  switch (msg.action) {
+    case 'message.create':
+      displayNewResponse(msg.data, responseId);
+      break;
+    case 'message.append':
+      appendToResponse(msg.data, responseId);
+      break;
+    case 'message.update':
+      replaceResponse(msg.data, responseId);
+      break;
+  }
+});
+```
+</Code>
+
+#### Hydrate using history
+
+Load completed responses from your database, then use history to catch up on any in-progress response:
+
+<Code>
+```javascript
+// Load completed responses from your database
+const completedResponses = await loadResponsesFromDatabase();
+
+const channel = realtime.channels.get('ai:responses');
+
+// Subscribe to live messages (implicitly attaches)
+await channel.subscribe((msg) => {
+  const responseId = msg.extras?.headers?.responseId;
+
+  // Skip messages for responses already loaded from database
+  if (completedResponses.has(responseId)) {
+    return;
+  }
+
+  switch (msg.action) {
+    case 'message.create':
+      displayNewResponse(msg.data, responseId);
+      break;
+    case 'message.append':
+      appendToResponse(msg.data, responseId);
+      break;
+    case 'message.update':
+      replaceResponse(msg.data, responseId);
+      break;
+  }
+});
+
+// Fetch history for any in-progress response
+const historyPage = await channel.history({ untilAttach: true });
+
+for (const msg of historyPage.items) {
+  const responseId = msg.extras?.headers?.responseId;
+
+  // Skip responses already loaded from database
+  if (completedResponses.has(responseId)) {
+    continue;
+  }
+
+  // msg.data contains the full concatenated text so far
+  displayFullResponse(msg.data, responseId);
+}
+```
+</Code>
+
+## Headers and metadata
+
+Use the `extras.headers` field to attach metadata to your messages. Headers are useful for correlating Ably messages with external systems, such as your database IDs or AI model request identifiers.
+
+### Header superseding behavior
+
+When you include headers in an append operation, they completely replace all previous headers on the message. This "last write wins" behavior means you must include all headers you want to retain with each append that specifies headers.
+
+<Code>
+```javascript
+// Initial message with headers
+const response = await channel.publish({
+  name: 'ai-response',
+  data: 'Hello',
+  extras: {
+    headers: {
+      responseId: 'resp_123',
+      model: 'gpt-4'
+    }
+  }
+});
+
+// Append without headers - previous headers are retained
+channel.appendMessage(response.serials[0], ' world');
+// Message headers: { responseId: 'resp_123', model: 'gpt-4' }
+
+// Append with headers - completely replaces previous headers
+channel.appendMessage(response.serials[0], '!', {
+  extras: {
+    headers: {
+      responseId: 'resp_123',
+      model: 'gpt-4',
+      tokensUsed: '15'
+    }
+  }
+});
+// Message headers: { responseId: 'resp_123', model: 'gpt-4', tokensUsed: '15' }
+```
+</Code>
+
+A common pattern is to include static metadata in the initial message, then add completion metadata with the final append:
+
+<Code>
+```javascript
+async function streamWithMetadata(prompt) {
+  const stream = await getAIModelStream(prompt);
+  let messageSerial;
+  let tokenCount = 0;
+
+  for await (const token of stream) {
+    tokenCount++;
+    if (!messageSerial) {
+      // First token: include static metadata
+      const response = await channel.publish({
+        name: 'ai-response',
+        data: token,
+        extras: {
+          headers: {
+            responseId: prompt.responseId,
+            model: prompt.model
+          }
+        }
+      });
+      messageSerial = response.serials[0];
+    } else {
+      // Subsequent tokens: append without headers
+      channel.appendMessage(messageSerial, token);
+    }
+  }
+
+  // Final append: include completion metadata
+  channel.appendMessage(messageSerial, '', {
+    extras: {
+      headers: {
+        responseId: prompt.responseId,
+        model: prompt.model,
+        tokensUsed: String(tokenCount),
+        completedAt: new Date().toISOString()
+      }
+    }
+  });
+}
+```
+</Code>
+
+### Metadata best practices
+
+Do not include metadata in the body of an append request. Instead, use the `extras.headers` field to
+keep metadata separate from the message content. This ensures that clients can easily process the
+concatenated response without needing to parse out metadata.
+
+<Code>
+```javascript
+// ✓ GOOD: Metadata in headers
+const response = await channel.publish({
+  data: 'The response text',  // Pure concatenated text
+  extras: {
+    headers: {
+      model: 'gpt-4',
+    }
+  }
+});
+
+// ✗ BAD: Mixing metadata with content
+const response = await channel.publish({
+  data: JSON.stringify({  // Don't do this
+    text: 'The response text',
+    model: 'gpt-4',
+  })
+});
+```
+</Code>
+
+By including metadata in the body of the message, the final concatenated response would contain all
+the metadata from each append, making it difficult to extract the pure response text.
+
+For example, if you appended tokens with metadata in the body, the final message data would look
+like this:
+
+```json
+{
+  "text": "Hello",
+  "model": "gpt-4",
+}{
+  "text": " world",
+  "model": "gpt-4",
+}{
+  "text": "!",
+  "model": "gpt-4",
+}
+```
+
+If you use headers for metadata, and the body only contains the response text, the final message
+data would be simply:
+
+```text
+Hello world!
+```

From 23ae35ac74ec2f5f298ee32e2753acd7171ce2d0 Mon Sep 17 00:00:00 2001
From: zak <zak.knill@ably.com>
Date: Thu, 11 Dec 2025 17:40:08 +0000
Subject: [PATCH 02/13] fix nav and typos

---
 src/data/nav/aitransport.ts                        | 14 +++++++-------
 .../token-streaming/message-per-response.mdx       |  8 ++++----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/data/nav/aitransport.ts b/src/data/nav/aitransport.ts
index 4b892d74ac..97d5167ce4 100644
--- a/src/data/nav/aitransport.ts
+++ b/src/data/nav/aitransport.ts
@@ -16,14 +16,14 @@ export default {
           link: '/docs/ai-transport',
           index: true,
         },
+      ],
+    },
+    {
+      name: 'Token streaming',
+      pages: [
         {
-          name: 'Token streaming',
-          pages: [
-            {
-              name: 'Message per response',
-              link: '/docs/ai-transport/features/token-streaming/message-per-response',
-            },
-          ],
+          name: 'Message per response',
+          link: '/docs/ai-transport/features/token-streaming/message-per-response',
         },
       ],
     },
diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
index 41a21ba555..4cfa42f7c2 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
@@ -29,7 +29,7 @@ This approach is ideal when:
    action to the Ably channel with an empty or the first token as content.
 2. **Token streaming**: Append subsequent tokens to the original message by publishing those tokens
    with the `message.append` action.
-3. **Live Delivery**: Clients subscribed to the channel receive each appended token in real-time, allowing
+3. **Live Delivery**: Clients subscribed to the channel receive each appended token in realtime, allowing
    them to progressively render the response.
 4. **Compacted history**: The channel history contains only one message per AI response,
    which includes all tokens appended to it concatenated together.
@@ -58,12 +58,12 @@ Standard Ably message [size limits](/docs/platform/pricing/limits#message) apply
 
 ## Publishing tokens
 
-You should publish tokens from a [Realtime](/docs/api/realtime-sdk) client, which maintains a
+Publish tokens from a [Realtime](/docs/api/realtime-sdk) client, which maintains a
 persistent connection to the Ably service. This allows you to publish at very high message rates
 with the lowest possible latencies, while preserving guarantees around message delivery order.
 For more information, see [Realtime and REST](/docs/basics#realtime-and-rest).
 
-[Channels](/docs/channels) are used to separate message traffic into different topics.
+[Channels](/docs/channels) separate message traffic into different topics.
 For token streaming, each conversation or session typically has its own channel.
 
 Use the [`get()`](/docs/api/realtime-sdk/channels#get) method to create or retrieve a channel instance:
@@ -140,7 +140,7 @@ Subscribers receive different message actions depending on when they join and ho
 messages.
 
 When subscribed to a channel, clients receive the initial message with the `message.create` action,
-followed by each token as a `message.append` action in real-time.
+followed by each token as a `message.append` action in realtime.
 
 <Code>
 ```javascript

From 5f2b13e11070bc558e3f223a2695b3d1414585c2 Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 20:33:11 +0000
Subject: [PATCH 03/13] ai-transport/token-streaming: unify nav

Unifies the token streaming nav for token streaming after rebase.
---
 src/data/nav/aitransport.ts | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/data/nav/aitransport.ts b/src/data/nav/aitransport.ts
index 97d5167ce4..dd82007afa 100644
--- a/src/data/nav/aitransport.ts
+++ b/src/data/nav/aitransport.ts
@@ -25,11 +25,6 @@ export default {
           name: 'Message per response',
           link: '/docs/ai-transport/features/token-streaming/message-per-response',
         },
-      ],
-    },
-    {
-      name: 'Token streaming',
-      pages: [
         {
           name: 'Message per token',
           link: '/docs/ai-transport/features/token-streaming/message-per-token',

From b915ac770dcff3b650663ce8b53b8ed2ec3f57ba Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 20:49:02 +0000
Subject: [PATCH 04/13] ai-transport/token-streaming: refine intro

Refines the intro copy in message-per-response to have structural
similarity with the message-per-token page.
---
 .../token-streaming/message-per-response.mdx  | 51 +++++++------------
 1 file changed, 17 insertions(+), 34 deletions(-)

diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
index 4cfa42f7c2..8d5cd2ffbb 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
@@ -3,43 +3,30 @@ title: Message per response
 meta_description: "Stream individual tokens from AI models into a single message over Ably."
 ---
 
-Stream LLM and generative AI responses efficiently by appending individual tokens to a single
-message on an Ably channel. This pattern creates one complete response message in channel history
-while delivering tokens in realtime.
+Token streaming with message-per-response is a pattern where every token generated by your model is appended to a single Ably message. Each complete AI response then appears as one message in the channel history while delivering live tokens in realtime. This uses [Ably Pub/Sub](/docs/basics) for realtime communication between agents and clients.
 
-## Overview
+This pattern is useful for chat-style applications where you want each complete AI response stored as a single message in history, making it easy to retrieve and display multi-response conversation history. Each agent response becomes a single message that grows as tokens are appended, allowing clients joining mid-stream to catch up efficiently without processing thousands of individual tokens.
 
-The message-per-response pattern enables you to stream AI-generated content as individual tokens in
-realtime, while maintaining a clean, compacted message history. Each AI response becomes a single
-message that grows as tokens are appended, resulting in efficient storage and easy retrieval of
-complete responses.
+## How it works <a id="how-it-works"/>
 
-### When to use this pattern
+1. **Initial message**: When an agent response begins, publish an initial message with `message.create` action to the Ably channel with an empty or the first token as content.
+2. **Token streaming**: Append subsequent tokens to the original message by publishing those tokens with the `message.append` action.
+3. **Live delivery**: Clients subscribed to the channel receive each appended token in realtime, allowing them to progressively render the response.
+4. **Compacted history**: The channel history contains only one message per agent response, which includes all tokens appended to it concatenated together.
 
-This approach is ideal when:
+You do not need to mark the message or token stream as completed; the final message content will automatically include the full response constructed from all appended tokens.
 
-- You want each complete AI response stored as a single message in history.
-- You want clients joining mid-stream to catch up efficiently without processing thousands of
-  individual tokens.
-- Your application displays progressive AI responses that build up over time.
-
-### How it works
-
-1. **Initial message**: When an AI response begins, publish an initial message with `message.create`
-   action to the Ably channel with an empty or the first token as content.
-2. **Token streaming**: Append subsequent tokens to the original message by publishing those tokens
-   with the `message.append` action.
-3. **Live Delivery**: Clients subscribed to the channel receive each appended token in realtime, allowing
-   them to progressively render the response.
-4. **Compacted history**: The channel history contains only one message per AI response,
-   which includes all tokens appended to it concatenated together.
+<Aside data-type="important">
+Standard Ably message [size limits](/docs/platform/pricing/limits#message) apply to the complete concatenated message. The system validates size limits before accepting append operations. If appending a token would exceed the maximum message size, the append is rejected.
+</Aside>
 
-You do not need to mark the message or token stream as completed; the final message will
-automatically have the full response with all tokens appended to it.
+## Enable appends <a id="enable"/>
 
-## Setup
+Message append functionality requires the "Message annotations, updates, and deletes" [channel rule](/docs/channels#rules) enabled for your channel or [namespace](/docs/channels#namespaces).
 
-Message append functionality requires the "Message annotations, updates, and deletes" [channel rule](/docs/channels#rules) enabled for your channel or [namespace](/docs/channels#namespaces). This rule automatically enables message persistence.
+<Aside data-type="important">
+When the "Message updates and deletes" channel rule is enabled, messages are persisted regardless of whether or not persistence is enabled, in order to support the feature. This may increase your usage since [we charge for persisting messages](https://faqs.ably.com/how-does-ably-count-messages).
+</Aside>
 
 To enable the channel rule:
 
@@ -50,11 +37,7 @@ To enable the channel rule:
 5. Select the "Message annotations, updates, and deletes" rule from the list.
 6. Click "Create channel rule".
 
-The examples in this guide use the `ai:` namespace prefix, which assumes you have configured the rule for `ai:*`.
-
-### Message size limits
-
-Standard Ably message [size limits](/docs/platform/pricing/limits#message) apply to the complete concatenated message. The system validates size limits before accepting append operations. If appending a token would exceed the maximum message size, the append is rejected.
+The examples on this page use the `ai:` namespace prefix, which assumes you have configured the rule for `ai:*`.
 
 ## Publishing tokens
 

From 2be2502b1a54b3d144eb7133ef49e7de68fdeeac Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 21:15:42 +0000
Subject: [PATCH 05/13] ai-transport: refine Publishing section

Refine the Publishing section of the message-per-response docs.

- Include anchor tags on title
- Describe the `serial` identifier
- Align with stream pattern used in message-per-token docs
- Remove duplicate example
---
 .../token-streaming/message-per-response.mdx  | 73 +++++++------------
 1 file changed, 26 insertions(+), 47 deletions(-)

diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
index 8d5cd2ffbb..591e8e59bf 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
@@ -39,15 +39,11 @@ To enable the channel rule:
 
 The examples on this page use the `ai:` namespace prefix, which assumes you have configured the rule for `ai:*`.
 
-## Publishing tokens
+## Publishing tokens <a id="publishing"/>
 
-Publish tokens from a [Realtime](/docs/api/realtime-sdk) client, which maintains a
-persistent connection to the Ably service. This allows you to publish at very high message rates
-with the lowest possible latencies, while preserving guarantees around message delivery order.
-For more information, see [Realtime and REST](/docs/basics#realtime-and-rest).
+Publish tokens from a [Realtime](/docs/api/realtime-sdk) client, which maintains a persistent connection to the Ably service. This allows you to publish at very high message rates with the lowest possible latencies, while preserving guarantees around message delivery order. For more information, see [Realtime and REST](/docs/basics#realtime-and-rest).
 
-[Channels](/docs/channels) separate message traffic into different topics.
-For token streaming, each conversation or session typically has its own channel.
+[Channels](/docs/channels) separate message traffic into different topics. For token streaming, each conversation or session typically has its own channel.
 
 Use the [`get()`](/docs/api/realtime-sdk/channels#get) method to create or retrieve a channel instance:
 
@@ -57,66 +53,49 @@ const channel = realtime.channels.get('ai:{{RANDOM_CHANNEL_NAME}}');
 ```
 </Code>
 
-To start streaming an AI response, publish the initial message. Then append each subsequent token
-to that message as it arrives from the AI model:
+To start streaming an AI response, publish the initial message. The message is identified by a server-assigned identifier called a [`serial`](/docs/messages#properties). Use the `serial` to append each subsequent token to the message as it arrives from the AI model:
 
 <Code>
 ```javascript
-// Example: stream yields string tokens like 'Hello', ' world', '!'
-
 // Publish initial message and capture the serial for appending tokens
 const { serials: [msgSerial] } = await channel.publish('response', { data: '' });
 
-for await (const token of stream) {
+// Example: stream returns events like { type: 'token', text: 'Hello' }
+for await (const event of stream) {
   // Append each token as it arrives
-  channel.appendMessage(msgSerial, token);
+  if (event.type === 'token') {
+    channel.appendMessage(msgSerial, event.text);
+  }
 }
 ```
 </Code>
 
-When publishing tokens, don't await the `channel.appendMessage()` call. Ably rolls up acknowledgments
-and debounces them for efficiency, which means awaiting each append would unnecessarily slow down
-your token stream. Messages are still published in the order that `appendMessage()` is called, so delivery
-order is not affected.
-
-Append only supports concatenating data of the same type as the original message. For example, if
-the initial message data is a string, all appended tokens must also be strings. If the initial
-message data is binary, all appended tokens must be binary.
-
-This pattern allows publishing append operations for multiple concurrent model responses on the same
-channel. As long as you append to the correct message serial, tokens from different responses will
-not interfere with each other, and the final concatenated message for each response will contain only the tokens
-from that response.
-
-### Complete publish example
-
-The following example shows how to stream an AI response, publishing the first token as the initial message and appending subsequent tokens:
+When publishing tokens, don't await the `channel.appendMessage()` call. Ably rolls up acknowledgments and debounces them for efficiency, which means awaiting each append would unnecessarily slow down your token stream. Messages are still published in the order that `appendMessage()` is called, so delivery order is not affected.
 
 <Code>
 ```javascript
-const realtime = new Ably.Realtime('{{API_KEY}}');
-const channel = realtime.channels.get('ai:responses');
-
-async function streamAIResponse(prompt) {
-  // Example: stream yields string tokens like 'Hello', ' world', '!'
-  const stream = await getAIModelStream(prompt);
-
-  let messageSerial;
+// ✅ Do this - append without await for maximum throughput
+for await (const event of stream) {
+  if (event.type === 'token') {
+    channel.appendMessage(msgSerial, event.text);
+  }
+}
 
-  for await (const token of stream) {
-    if (!messageSerial) {
-      // First token: create the message and get serial
-      const response = await channel.publish('ai-response', token);
-      messageSerial = response.serials[0];
-    } else {
-      // Subsequent tokens: append without awaiting
-      channel.appendMessage(messageSerial, token);
-    }
+// ❌ Don't do this - awaiting each append reduces throughput
+for await (const event of stream) {
+  if (event.type === 'token') {
+    await channel.appendMessage(msgSerial, event.text);
   }
 }
 ```
 </Code>
 
+<Aside data-type="important">
+Append only supports concatenating data of the same type as the original message. For example, if the initial message data is a string, all appended tokens must also be strings. If the initial message data is binary, all appended tokens must be binary.
+</Aside>
+
+This pattern allows publishing append operations for multiple concurrent model responses on the same channel. As long as you append to the correct message serial, tokens from different responses will not interfere with each other, and the final concatenated message for each response will contain only the tokens from that response.
+
 ## Subscribing to token streams
 
 Subscribers receive different message actions depending on when they join and how they're retrieving

From a94068864053efe695cea301f8b9947f772deb7c Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 21:33:21 +0000
Subject: [PATCH 06/13] ai-transport: refine Subscribing section

Refine the Subscribing section of the message-per-response docs.

- Add anchor tag to heading
- Describes each action upfront
- Uses RANDOM_CHANNEL_NAME
---
 .../token-streaming/message-per-response.mdx  | 30 ++++++++-----------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
index 591e8e59bf..986be27747 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
@@ -96,46 +96,42 @@ Append only supports concatenating data of the same type as the original message
 
 This pattern allows publishing append operations for multiple concurrent model responses on the same channel. As long as you append to the correct message serial, tokens from different responses will not interfere with each other, and the final concatenated message for each response will contain only the tokens from that response.
 
-## Subscribing to token streams
+## Subscribing to token streams <a id="subscribing"/>
 
-Subscribers receive different message actions depending on when they join and how they're retrieving
-messages.
+Subscribers receive different message actions depending on when they join and how they're retrieving messages. Each message has an `action` field that indicates how to process it, and a `serial` field that identifies which message the action relates to:
 
-When subscribed to a channel, clients receive the initial message with the `message.create` action,
-followed by each token as a `message.append` action in realtime.
+- `message.create`: Indicates a new response has started (i.e. a new message was created). The message `data` contains the initial content (often empty or the first token). Store this as the beginning of a new response using `serial` as the identifier.
+- `message.append`: Contains a single token fragment to append. The message `data` contains only the new token, not the full concatenated response. Append this token to the existing response identified by `serial`.
+- `message.update`: Contains the complete response up to that point. The message `data` contains the full concatenated text so far. Replace the entire response content with this data for the message identified by `serial`. This action occurs when the channel needs to resynchronize the full message state, such as after a client [resumes](/docs/connect/states#resume) from a transient disconnection.
 
 <Code>
 ```javascript
-const channel = realtime.channels.get('ai:responses');
+const channel = realtime.channels.get('ai:{{RANDOM_CHANNEL_NAME}}');
 
 // Track responses by message serial
 const responses = new Map();
 
-await channel.subscribe((msg) => {
-  switch (msg.action) {
+// Subscribe to live messages (implicitly attaches the channel)
+await channel.subscribe((message) => {
+  switch (message.action) {
     case 'message.create':
       // New response started
-      responses.set(msg.serial, msg.data);
+      responses.set(message.serial, message.data);
       break;
     case 'message.append':
       // Append token to existing response
-      const current = responses.get(msg.serial) || '';
-      responses.set(msg.serial, current + msg.data);
+      const current = responses.get(message.serial) || '';
+      responses.set(message.serial, current + message.data);
       break;
     case 'message.update':
       // Replace entire response content
-      responses.set(msg.serial, msg.data);
+      responses.set(message.serial, message.data);
       break;
   }
 });
 ```
 </Code>
 
-Each `message.append` event contains only the new token fragment in `msg.data`, not the full
-concatenated response.
-
-Occasionally you may receive a `message.update` action, which indicates that the channel needs to stream the entire message data so far. For example, this can happen if the client [resumes](/docs/connect/states#resume) after a transient disconnection and the channel needs to resynchronize the full message state. In this case, `msg.data` contains the complete response up to that point. For `message.update` events, you should replace the entire response content.
-
 ## Client hydration
 
 Clients joining a channel or recovering from disconnection can efficiently catchup using rewind or

From a2b812324cf8204313913115e8534f02b392eedf Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 21:59:01 +0000
Subject: [PATCH 07/13] ai-transport: refine rewind section

Refine the rewind section of the message-per-response docs.

- Include description of allowed rewind paameters
- Tweak copy
---
 .../token-streaming/message-per-response.mdx  | 40 +++++++++++--------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
index 986be27747..3492381796 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
@@ -134,20 +134,17 @@ await channel.subscribe((message) => {
 
 ## Client hydration
 
-Clients joining a channel or recovering from disconnection can efficiently catchup using rewind or
-history. For temporary disconnections, Ably's automatic [connection recovery](docs/connect/states#connection-state-recovery)
-ensures that clients receive all missed tokens in order.
+When clients connect or reconnect, such as after a page refresh, they often need to catch up on complete responses and individual tokens that were published while they were offline or before they joined.
 
-By using either rewind or history with `untilAttach`, clients can efficiently hydrate the existing
-response state without needing to process every individual token. Both rewind and history deliver
-concatenated responses as `message.update` events and seamlessly transition from historical
-responses to live `message.append` events.
+The message per response pattern enables efficient client state hydration without needing to process every individual token and supports seamlessly transitioning from historical responses to live tokens.
 
-### Using rewind
+<Aside data-type="note">
+For temporary disconnections, Ably's automatic [connection recovery](docs/connect/states#connection-state-recovery) ensures that clients receive all missed tokens in order.
+</Aside>
 
-[Rewind](/docs/channels/options/rewind) attaches to a channel starting from a point in the past, delivering complete concatenated
-messages as `message.update` events.
+### Using rewind for recent history <a id="rewind"/>
 
+The simplest approach is to use Ably's [rewind](/docs/channels/options/rewind) channel option to attach to the channel at some point in the recent past, and automatically receive all messages since that point. Historical messages are delivered as `message.update` events containing the complete concatenated response, which then seamlessly transition to live `message.append` events for any ongoing responses:
 
 <Code>
 ```javascript
@@ -159,26 +156,37 @@ const channel = realtime.channels.get('ai:{{RANDOM_CHANNEL_NAME}}', {
 // Track responses by message serial
 const responses = new Map();
 
-await channel.subscribe((msg) => {
-  switch (msg.action) {
+// Subscribe to receive both recent historical and live messages,
+// which are delivered in order to the subscription
+await channel.subscribe((message) => {
+  switch (message.action) {
     case 'message.create':
       // New response started
-      responses.set(msg.serial, msg.data);
+      responses.set(message.serial, message.data);
       break;
     case 'message.append':
       // Append token to existing response
-      const current = responses.get(msg.serial) || '';
-      responses.set(msg.serial, current + msg.data);
+      const current = responses.get(message.serial) || '';
+      responses.set(message.serial, current + message.data);
       break;
     case 'message.update':
       // Replace entire response content
-      responses.set(msg.serial, msg.data);
+      responses.set(message.serial, message.data);
       break;
   }
 });
 ```
 </Code>
 
+Rewind supports two formats:
+
+- **Time-based**: Use a time interval like `'30s'` or `'2m'` to retrieve messages from that time period
+- **Count-based**: Use a number like `10` or `50` to retrieve the most recent N messages (maximum 100)
+
+<Aside data-type="note">
+At most 100 messages will be retrieved in a rewind request. If more messages exist within the specified interval, only the most recent 100 are sent.
+</Aside>
+
 ### Using history with untilAttach
 
 The `untilAttach` option provides [continuous history](/docs/storage-history/history#continuous-history)

From 8272217ca194e7bdaff9784db6dcd33a53a4408c Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 22:08:26 +0000
Subject: [PATCH 08/13] ai-transport/token-streaming: refine history

Refines the history section for the message-per-response docs.

- Adds anchor to heading
- Uses RANDOM_CHANNEL_NAME
- Use message serial in code snippet instead of ID
- Tweaks copy
---
 .../token-streaming/message-per-response.mdx  | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
index 3492381796..2b68f9a3b3 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
@@ -187,29 +187,32 @@ Rewind supports two formats:
 At most 100 messages will be retrieved in a rewind request. If more messages exist within the specified interval, only the most recent 100 are sent.
 </Aside>
 
-### Using history with untilAttach
+### Using history for older messages <a id="history"/>
 
-The `untilAttach` option provides [continuous history](/docs/storage-history/history#continuous-history)
-from the point of attachment backward:
+Use [channel history](/docs/storage-history/history) with the [`untilAttach` option](/docs/storage-history/history#continuous-history) to paginate back through history to obtain historical responses, while preserving continuity with the delivery of live tokens:
 
 <Code>
 ```javascript
-const channel = realtime.channels.get('ai:responses');
+const channel = realtime.channels.get('ai:{{RANDOM_CHANNEL_NAME}}');
 
+// Track responses by message serial
 const responses = new Map();
 
 // Subscribe to live messages (implicitly attaches the channel)
-await channel.subscribe((msg) => {
-  switch (msg.action) {
+await channel.subscribe((message) => {
+  switch (message.action) {
     case 'message.create':
-      responses.set(msg.id, msg.data);
+      // New response started
+      responses.set(message.serial, message.data);
       break;
     case 'message.append':
-      const current = responses.get(msg.id) || '';
-      responses.set(msg.id, current + msg.data);
+      // Append token to existing response
+      const current = responses.get(message.serial) || '';
+      responses.set(message.serial, current + message.data);
       break;
     case 'message.update':
-      responses.set(msg.id, msg.data);
+      // Replace entire response content
+      responses.set(message.serial, message.data);
       break;
   }
 });
@@ -222,7 +225,7 @@ while (page) {
   // Messages are newest-first
   for (const message of page.items) {
     // message.data contains the full concatenated text
-    responses.set(message.id, message.data);
+    responses.set(message.serial, message.data);
   }
 
   // Move to next page if available

From a03e17a08a507065d95c20ed57da7cf704d540bb Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 22:52:17 +0000
Subject: [PATCH 09/13] ai-transport/token-streaming: in-progress rewind

Fix the hydration of in progress responses via rewind by using the responseId in the extras to correlate messages with completed responses loaded from the database.
---
 .../token-streaming/message-per-response.mdx  | 76 ++++++++++++++++---
 1 file changed, 67 insertions(+), 9 deletions(-)

diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
index 2b68f9a3b3..29f3141b9b 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
@@ -234,46 +234,104 @@ while (page) {
 ```
 </Code>
 
-### Hydrating an in-progress response
+### Hydrating an in-progress response <a id="in-progress-response"/>
 
-A common pattern is to persist completed responses in your database while using Ably for streaming in-progress responses. When clients reconnect, they load completed responses from your database first, then use Ably to catch up on any response that was still in progress.
+A common pattern is to persist complete model responses in your database while using Ably for streaming in-progress responses.
+
+The client loads completed responses from your database, then uses Ably to catch up on any response that was still in progress.
+
+You can hydrate in-progress responses using either the [rewind](#rewind) or [history](#history) pattern.
+
+#### Publishing with correlation metadata <a id="publishing-with-metadata"/>
+
+To correlate Ably messages with your database records, include the `responseId` in the message [extras](/docs/messages#properties) when publishing:
+
+<Code>
+```javascript
+// Publish initial message with responseId in extras
+const { serials: [msgSerial] } = await channel.publish({
+  name: 'response',
+  data: '',
+  extras: {
+    headers: {
+      responseId: 'resp_abc123' // Your database response ID
+    }
+  }
+});
+
+// Append tokens, including extras to preserve headers
+for await (const event of stream) {
+  if (event.type === 'token') {
+    channel.appendMessage(msgSerial, event.text, {
+      extras: {
+        headers: {
+          responseId: 'resp_abc123'
+        }
+      }
+    });
+  }
+}
+```
+</Code>
+
+<Aside data-type="note">
+When appending tokens, include the `extras` with all headers to preserve them on the message. If you omit `extras` from an append operation, any existing headers will be removed. If you include `extras`, the headers completely replace any previous headers. This is the same [mixin behavior](/docs/messages/updates-deletes) used for message updates and deletes.
+</Aside>
 
 #### Hydrate using rewind
 
-Load completed responses from your database, then use rewind to catch up on any in-progress response, skipping messages for responses already loaded:
+When hydrating, load completed responses from your database, then use rewind to catch up on any in-progress response. Check the `responseId` from message extras to skip responses already loaded from your database:
 
 <Code>
 ```javascript
 // Load completed responses from your database
+// completedResponses is a Set of responseIds
 const completedResponses = await loadResponsesFromDatabase();
 
+// Use rewind to receive recent historical messages
 const channel = realtime.channels.get('ai:responses', {
   params: { rewind: '2m' }
 });
 
-await channel.subscribe((msg) => {
-  const responseId = msg.extras?.headers?.responseId;
+// Track in-progress responses by responseId
+const inProgressResponses = new Map();
+
+await channel.subscribe((message) => {
+  const responseId = message.extras?.headers?.responseId;
+
+  if (!responseId) {
+    console.warn('Message missing responseId');
+    return;
+  }
 
   // Skip messages for responses already loaded from database
   if (completedResponses.has(responseId)) {
     return;
   }
 
-  switch (msg.action) {
+  switch (message.action) {
     case 'message.create':
-      displayNewResponse(msg.data, responseId);
+      // New response started
+      inProgressResponses.set(responseId, message.data);
       break;
     case 'message.append':
-      appendToResponse(msg.data, responseId);
+      // Append token to existing response
+      const current = inProgressResponses.get(responseId) || '';
+      inProgressResponses.set(responseId, current + message.data);
       break;
     case 'message.update':
-      replaceResponse(msg.data, responseId);
+      // Replace entire response content
+      inProgressResponses.set(responseId, message.data);
       break;
   }
 });
 ```
 </Code>
 
+<Aside data-type="note">
+Alternatively, instead of including `responseId` in message extras, you could store the Ably message `serial` alongside each response in your database. When hydrating, skip messages for responses already loaded from database using the message `serial` to identify the response. This approach eliminates the need to include metadata in message extras, but requires storing the `serial` (which is assigned by Ably) in your database after the response is complete.
+</Aside>
+
 #### Hydrate using history
 
 Load completed responses from your database, then use history to catch up on any in-progress response:

From 16b8be6491b4043b361ca0a58d052a0762680979 Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 22:54:15 +0000
Subject: [PATCH 10/13] ai-transport/token-streaming: in progress history

Fix the hydration of in progress responses using history by obtaining
the timestamp of the last completed response loaded from the database
and paginating history forwards from that point.
---
 .../token-streaming/message-per-response.mdx  | 65 ++++++++++++++-----
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
index 29f3141b9b..a8318bde1e 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
@@ -334,54 +334,83 @@ Alternatively, instead of including `responseId` in message extras, you could st
 
 #### Hydrate using history
 
-Load completed responses from your database, then use history to catch up on any in-progress response:
+Load completed responses from your database, then use [channel history](/docs/storage-history/history) with the [`untilAttach` option](/docs/storage-history/history#continuous-history) to catch up on any in-progress responses. Use the timestamp of the last completed response to start pagination from that point forward, ensuring continuity with live message delivery.
 
 <Code>
 ```javascript
-// Load completed responses from your database
+// Load completed responses from database (sorted by timestamp, oldest first)
 const completedResponses = await loadResponsesFromDatabase();
 
-const channel = realtime.channels.get('ai:responses');
+// Get the timestamp of the latest completed response
+const latestTimestamp = completedResponses.latest().timestamp;
+
+const channel = realtime.channels.get('ai:{{RANDOM_CHANNEL_NAME}}');
+
+// Track in progress responses by ID
+const inProgressResponses = new Map();
 
 // Subscribe to live messages (implicitly attaches)
-await channel.subscribe((msg) => {
-  const responseId = msg.extras?.headers?.responseId;
+await channel.subscribe((message) => {
+  const responseId = message.extras?.headers?.responseId;
+
+  if (!responseId) {
+    console.warn('Message missing responseId');
+    return;
+  }
 
   // Skip messages for responses already loaded from database
   if (completedResponses.has(responseId)) {
     return;
   }
 
-  switch (msg.action) {
+  switch (message.action) {
     case 'message.create':
-      displayNewResponse(msg.data, responseId);
+      // New response started
+      inProgressResponses.set(responseId, message.data);
       break;
     case 'message.append':
-      appendToResponse(msg.data, responseId);
+      // Append token to existing response
+      const current = inProgressResponses.get(responseId) || '';
+      inProgressResponses.set(responseId, current + message.data);
       break;
     case 'message.update':
-      replaceResponse(msg.data, responseId);
+      // Replace entire response content
+      inProgressResponses.set(responseId, message.data);
       break;
   }
 });
 
-// Fetch history for any in-progress response
-const historyPage = await channel.history({ untilAttach: true });
+// Fetch history from the last completed response until attachment
+let page = await channel.history({
+  untilAttach: true,
+  start: latestTimestamp,
+  direction: 'forwards'
+});
 
-for (const msg of historyPage.items) {
-  const responseId = msg.extras?.headers?.responseId;
+// Paginate through all missed messages
+while (page) {
+  for (const message of page.items) {
+    const responseId = message.extras?.headers?.responseId;
 
-  // Skip responses already loaded from database
-  if (completedResponses.has(responseId)) {
-    continue;
+    if (!responseId) {
+      console.warn('Message missing responseId');
+      continue;
+    }
+
+    // message.data contains the full concatenated text so far
+    inProgressResponses.set(responseId, message.data);
   }
 
-  // msg.data contains the full concatenated text so far
-  displayFullResponse(msg.data, responseId);
+  // Move to next page if available
+  page = page.hasNext() ? await page.next() : null;
 }
 ```
 </Code>
 
+<Aside data-type="note">
+Live messages may arrive via the subscription while you are still processing historical messages. Your application should handle this by queueing live messages and processing them only after all historical messages have been processed.
+</Aside>
+
 ## Headers and metadata
 
 Use the `extras.headers` field to attach metadata to your messages. Headers are useful for correlating Ably messages with external systems, such as your database IDs or AI model request identifiers.

From 913ad5969f9bdeb33054c431c448ce7040dc82dd Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 22:57:04 +0000
Subject: [PATCH 11/13] ai-transport/token-streaming: remove metadata

Removes the headers/metadata section, as this covers the specific
semantics of extras.headers handling with appends, which is better
addressed by the (upcoming) message append pub/sub docs. Instead, a
callout is used to describe header mixin semantics in the appropriate
place insofar as it relates to the discussion at hand.
---
 .../token-streaming/message-per-response.mdx  | 139 ------------------
 1 file changed, 139 deletions(-)

diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
index a8318bde1e..b91bf42ae1 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-response.mdx
@@ -410,142 +410,3 @@ while (page) {
 <Aside data-type="note">
 Live messages may arrive via the subscription while you are still processing historical messages. Your application should handle this by queueing live messages and processing them only after all historical messages have been processed.
 </Aside>
-
-## Headers and metadata
-
-Use the `extras.headers` field to attach metadata to your messages. Headers are useful for correlating Ably messages with external systems, such as your database IDs or AI model request identifiers.
-
-### Header superseding behavior
-
-When you include headers in an append operation, they completely replace all previous headers on the message. This "last write wins" behavior means you must include all headers you want to retain with each append that specifies headers.
-
-<Code>
-```javascript
-// Initial message with headers
-const response = await channel.publish({
-  name: 'ai-response',
-  data: 'Hello',
-  extras: {
-    headers: {
-      responseId: 'resp_123',
-      model: 'gpt-4'
-    }
-  }
-});
-
-// Append without headers - previous headers are retained
-channel.appendMessage(response.serials[0], ' world');
-// Message headers: { responseId: 'resp_123', model: 'gpt-4' }
-
-// Append with headers - completely replaces previous headers
-channel.appendMessage(response.serials[0], '!', {
-  extras: {
-    headers: {
-      responseId: 'resp_123',
-      model: 'gpt-4',
-      tokensUsed: '15'
-    }
-  }
-});
-// Message headers: { responseId: 'resp_123', model: 'gpt-4', tokensUsed: '15' }
-```
-</Code>
-
-A common pattern is to include static metadata in the initial message, then add completion metadata with the final append:
-
-<Code>
-```javascript
-async function streamWithMetadata(prompt) {
-  const stream = await getAIModelStream(prompt);
-  let messageSerial;
-  let tokenCount = 0;
-
-  for await (const token of stream) {
-    tokenCount++;
-    if (!messageSerial) {
-      // First token: include static metadata
-      const response = await channel.publish({
-        name: 'ai-response',
-        data: token,
-        extras: {
-          headers: {
-            responseId: prompt.responseId,
-            model: prompt.model
-          }
-        }
-      });
-      messageSerial = response.serials[0];
-    } else {
-      // Subsequent tokens: append without headers
-      channel.appendMessage(messageSerial, token);
-    }
-  }
-
-  // Final append: include completion metadata
-  channel.appendMessage(messageSerial, '', {
-    extras: {
-      headers: {
-        responseId: prompt.responseId,
-        model: prompt.model,
-        tokensUsed: String(tokenCount),
-        completedAt: new Date().toISOString()
-      }
-    }
-  });
-}
-```
-</Code>
-
-### Metadata best practices
-
-Do not include metadata in the body of an append request. Instead, use the `extras.headers` field to
-keep metadata separate from the message content. This ensures that clients can easily process the
-concatenated response without needing to parse out metadata.
-
-<Code>
-```javascript
-// ✓ GOOD: Metadata in headers
-const response = await channel.publish({
-  data: 'The response text',  // Pure concatenated text
-  extras: {
-    headers: {
-      model: 'gpt-4',
-    }
-  }
-});
-
-// ✗ BAD: Mixing metadata with content
-const response = await channel.publish({
-  data: JSON.stringify({  // Don't do this
-    text: 'The response text',
-    model: 'gpt-4',
-  })
-});
-```
-</Code>
-
-By including metadata in the body of the message, the final concatenated response would contain all
-the metadata from each append, making it difficult to extract the pure response text.
-
-For example, if you appended tokens with metadata in the body, the final message data would look
-like this:
-
-```json
-{
-  "text": "Hello",
-  "model": "gpt-4",
-}{
-  "text": " world",
-  "model": "gpt-4",
-}{
-  "text": "!",
-  "model": "gpt-4",
-}
-```
-
-If you use headers for metadata, and the body only contains the response text, the final message
-data would be simply:
-
-```text
-Hello world!
-```

From c9c9aa77c08a59a3ee60ca374337355f49db2763 Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 22:59:01 +0000
Subject: [PATCH 12/13] ai-transport/token-streaming: add resume callout

Update the token streaming with message per token docs to include a
callout describing resume behaviour in case of transient disconnection.
---
 .../features/token-streaming/message-per-token.mdx            | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-token.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-token.mdx
index 7e0f48e794..21683449b3 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-token.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-token.mdx
@@ -235,6 +235,10 @@ await channel.subscribe('stop', (message) => {
 
 When clients connect or reconnect, such as after a page refresh, they often need to catch up on tokens that were published while they were offline or before they joined. Ably provides several approaches to hydrate client state depending on your application's requirements.
 
+<Aside data-type="note">
+For temporary disconnections, Ably's automatic [connection recovery](docs/connect/states#connection-state-recovery) ensures that clients receive all missed tokens in order.
+</Aside>
+
 <Aside data-type="note">
 If you need to retrieve and process large amounts of historical data, consider using the [message-per-response](/docs/ai-transport/features/token-streaming/message-per-response) pattern instead, in which the complete response appears as a single message in history.
 </Aside>

From 5438a583fac27e1c293d162ca79235104a2eb9d8 Mon Sep 17 00:00:00 2001
From: Mike Christensen <mike.christensen@ably.com>
Date: Tue, 16 Dec 2025 23:00:07 +0000
Subject: [PATCH 13/13] ai-transport/token-streaming: headers

Fix the message per token docs headers to include anchors and align with
naming in the message per response page.
---
 .../features/token-streaming/message-per-token.mdx          | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/pages/docs/ai-transport/features/token-streaming/message-per-token.mdx b/src/pages/docs/ai-transport/features/token-streaming/message-per-token.mdx
index 21683449b3..71e7fdaf5a 100644
--- a/src/pages/docs/ai-transport/features/token-streaming/message-per-token.mdx
+++ b/src/pages/docs/ai-transport/features/token-streaming/message-per-token.mdx
@@ -245,7 +245,7 @@ If you need to retrieve and process large amounts of historical data, consider u
 
 ### Using rewind for recent history <a id="rewind"/>
 
-The simplest approach is to use Ably's [rewind](/docs/channels/options/rewind) channel option to automatically retrieve recent tokens when attaching to a channel:
+The simplest approach is to use Ably's [rewind](/docs/channels/options/rewind) channel option to attach to the channel at some point in the recent past, and automatically receive all tokens since that point:
 
 <Code>
 ```javascript
@@ -276,7 +276,7 @@ At most 100 messages will be retrieved in a rewind request. If more messages exi
 
 By default, rewind is limited to the last 2 minutes of messages. This is usually sufficient for scenarios where clients need only recent context, such as for continuous token streaming, or when the response stream from a given model request does not exceed 2 minutes. If you need more than 2 minutes of history, see [Using history for longer persistence](#history).
 
-### Using history for longer persistence <a id="history"/>
+### Using history for older messages <a id="history"/>
 
 For applications that need to retrieve tokens beyond the 2-minute rewind window, enable [persistence](/docs/storage-history/storage#all-message-persistence) on your channel. Use [channel history](/docs/storage-history/history) with the [`untilAttach` option](/docs/storage-history/history#continuous-history) to paginate back through history to obtain historical tokens, while preserving continuity with the delivery of live tokens:
 
@@ -309,7 +309,7 @@ while (page) {
 ```
 </Code>
 
-### Hydrating an in-progress live response <a id="live-response"/>
+### Hydrating an in-progress response <a id="in-progress-response"/>
 
 A common pattern is to persist complete model responses in your database while using Ably for live token delivery of the in-progress response.