dstoc · dstoc · Sep 16, 2025 · Sep 16, 2025
diff --git a/crates/llm/AGENTS.md b/crates/llm/AGENTS.md
@@ -59,15 +59,16 @@ Trait-based LLM client implementations for multiple providers.
   - when converting assistant tool calls into provider requests, the function `arguments` include the original tool-call `id` under the `_id` field
 - chat messages are an enum of `UserMessage`, `AssistantMessage`, `SystemMessage`, and `ToolMessage`, each with only relevant fields
     - `AssistantMessage` holds a `Vec<AssistantPart>` for text, tool calls, and thinking segments
+      - each assistant part carries optional `encrypted_content`; GeminiRust forwards it to and restores it from the Gemini `thought_signature`
     - tool calls include an `id` string, assigned locally when missing
     - tool messages carry the same `id` and store results via `JsonResult` (`content` or `error`)
 - Chat message, request, and response types serialize to and from JSON
   - skips serializing fields that are `None`, empty strings, or empty arrays
 - Responses
-  - `ResponseChunk` is an enum of `Thinking`, `ToolCall`, `Content`, `Usage`, or `Done`
+  - `ResponseChunk` emits `Part(AssistantPart)` items alongside `Usage` and `Done`
   - usage chunks carry `input_tokens` and `output_tokens`
-  - tool call chunks hold a single `ToolCall` and repeat as needed
-  - thinking, tool calls, and content stream first, followed by optional usage then `Done`
+  - streaming text and thinking segments arrive as assistant parts; consecutive parts without `encrypted_content` are merged while segments with encrypted data remain isolated
+  - tool call parts stream as single `AssistantPart::ToolCall` values
   - OpenAiChat client converts assistant history messages with tool calls into request `tool_calls` and stitches streaming tool call deltas into complete tool calls
   - OpenAiChat client parses `reasoning_content` from streamed responses into thinking text
 - Tool orchestration

diff --git a/crates/llm/src/gemini_rust.rs b/crates/llm/src/gemini_rust.rs
@@ -66,21 +66,27 @@ impl LlmClient for GeminiRustClient {
                     let mut parts_vec: Vec<Part> = Vec::new();
                     for part in a.content {
                         match part {
-                            AssistantPart::Text { text } => {
+                            AssistantPart::Text {
+                                text,
+                                encrypted_content,
+                            } => {
                                 parts_vec.push(Part::Text {
                                     text,
                                     thought: None,
-                                    thought_signature: None,
+                                    thought_signature: encrypted_content,
                                 });
                             }
-                            AssistantPart::ToolCall(tc) => {
-                                let args = match tc.arguments {
-                                    JsonResult::Content { .. } => tc.arguments_content_with_id(),
+                            AssistantPart::ToolCall {
+                                call,
+                                encrypted_content,
+                            } => {
+                                let args = match call.arguments {
+                                    JsonResult::Content { .. } => call.arguments_content_with_id(),
                                     JsonResult::Error { .. } => Value::Null,
                                 };
                                 parts_vec.push(Part::FunctionCall {
-                                    function_call: gemini_rust::FunctionCall::new(tc.name, args),
-                                    thought_signature: None,
+                                    function_call: gemini_rust::FunctionCall::new(call.name, args),
+                                    thought_signature: encrypted_content,
                                 });
                             }
                             AssistantPart::Thinking { .. } => {}
@@ -170,24 +176,36 @@ impl LlmClient for GeminiRustClient {
                                 Part::Text {
                                     text,
                                     thought,
-                                    thought_signature: _,
+                                    thought_signature,
                                 } => {
+                                    let encrypted_content = thought_signature.clone();
                                     if thought.unwrap_or(false) {
-                                        out.push(Ok(ResponseChunk::Thinking(text.clone())));
+                                        out.push(Ok(ResponseChunk::Part(
+                                            AssistantPart::Thinking {
+                                                text: text.clone(),
+                                                encrypted_content,
+                                            },
+                                        )));
                                     } else if !text.is_empty() {
-                                        out.push(Ok(ResponseChunk::Content(text.clone())));
+                                        out.push(Ok(ResponseChunk::Part(AssistantPart::Text {
+                                            text: text.clone(),
+                                            encrypted_content,
+                                        })));
                                     }
                                 }
                                 Part::FunctionCall {
                                     function_call,
-                                    thought_signature: _,
+                                    thought_signature,
                                 } => {
-                                    out.push(Ok(ResponseChunk::ToolCall(ToolCall {
-                                        id: Uuid::new_v4().to_string(),
-                                        name: function_call.name.clone(),
-                                        arguments: JsonResult::Content {
-                                            content: function_call.args.clone(),
+                                    out.push(Ok(ResponseChunk::Part(AssistantPart::ToolCall {
+                                        call: ToolCall {
+                                            id: Uuid::new_v4().to_string(),
+                                            name: function_call.name.clone(),
+                                            arguments: JsonResult::Content {
+                                                content: function_call.args.clone(),
+                                            },
                                         },
+                                        encrypted_content: thought_signature.clone(),
                                     })));
                                 }
                                 _ => {}

diff --git a/crates/llm/src/harmony.rs b/crates/llm/src/harmony.rs
@@ -120,29 +120,29 @@ fn build_prompt(
             ChatMessage::Assistant(a) => {
                 for part in &a.content {
                     match part {
-                        AssistantPart::Thinking { text } => {
+                        AssistantPart::Thinking { text, .. } => {
                             convo_msgs.push(
                                 Message::from_role_and_content(Role::Assistant, text.clone())
                                     .with_channel("analysis"),
                             );
                         }
-                        AssistantPart::Text { text } => {
+                        AssistantPart::Text { text, .. } => {
                             convo_msgs.push(
                                 Message::from_role_and_content(Role::Assistant, text.clone())
                                     .with_channel("final"),
                             );
                         }
-                        AssistantPart::ToolCall(tc) => {
-                            let args = match &tc.arguments {
+                        AssistantPart::ToolCall { call, .. } => {
+                            let args = match &call.arguments {
                                 JsonResult::Content { .. } => {
-                                    tc.arguments_content_with_id().to_string()
+                                    call.arguments_content_with_id().to_string()
                                 }
                                 JsonResult::Error { error } => error.clone(),
                             };
                             convo_msgs.push(
                                 Message::from_role_and_content(Role::Assistant, args)
                                     .with_channel("commentary")
-                                    .with_recipient(format!("functions.{}", tc.name))
+                                    .with_recipient(format!("functions.{}", call.name))
                                     .with_content_type("<|constrain|>json"),
                             );
                         }
@@ -303,9 +303,17 @@ impl LlmClient for HarmonyClient {
                             if !delta.is_empty() && parser.current_recipient().is_none() {
                                 match parser.current_channel().as_deref() {
                                     Some("analysis") => {
-                                        out.push(Ok(ResponseChunk::Thinking(delta)))
+                                        out.push(Ok(ResponseChunk::Part(AssistantPart::Thinking {
+                                            text: delta,
+                                            encrypted_content: None,
+                                        })))
+                                    }
+                                    Some("final") => {
+                                        out.push(Ok(ResponseChunk::Part(AssistantPart::Text {
+                                            text: delta,
+                                            encrypted_content: None,
+                                        })))
                                     }
-                                    Some("final") => out.push(Ok(ResponseChunk::Content(delta))),
                                     _ => {}
                                 }
                             }
@@ -328,10 +336,13 @@ impl LlmClient for HarmonyClient {
                                         error: text.clone(),
                                     },
                                 };
-                                out.push(Ok(ResponseChunk::ToolCall(ToolCall {
-                                    id: Uuid::new_v4().to_string(),
-                                    name: name.to_string(),
-                                    arguments,
+                                out.push(Ok(ResponseChunk::Part(AssistantPart::ToolCall {
+                                    call: ToolCall {
+                                        id: Uuid::new_v4().to_string(),
+                                        name: name.to_string(),
+                                        arguments,
+                                    },
+                                    encrypted_content: None,
                                 })));
                             }
                         }
@@ -393,6 +404,7 @@ mod tests {
             ChatMessage::Assistant(AssistantMessage {
                 content: vec![AssistantPart::Thinking {
                     text: "ponder".into(),
+                    encrypted_content: None,
                 }],
             }),
         ]);
@@ -424,9 +436,11 @@ mod tests {
                 content: vec![
                     AssistantPart::Thinking {
                         text: "ponder".into(),
+                        encrypted_content: None,
                     },
                     AssistantPart::Text {
                         text: "Hello".into(),
+                        encrypted_content: None,
                     },
                 ],
             }),
@@ -435,9 +449,11 @@ mod tests {
                 content: vec![
                     AssistantPart::Thinking {
                         text: "think".into(),
+                        encrypted_content: None,
                     },
                     AssistantPart::Text {
                         text: "I'm good".into(),
+                        encrypted_content: None,
                     },
                 ],
             }),
@@ -459,13 +475,16 @@ mod tests {
         let (_, prompt, prefill_tokens, _) = setup(vec![
             ChatMessage::user("2+2?".into()),
             ChatMessage::Assistant(AssistantMessage {
-                content: vec![AssistantPart::ToolCall(ToolCall {
-                    id: "1".into(),
-                    name: "add".into(),
-                    arguments: JsonResult::Content {
-                        content: json!({"a": 2, "b": 2}),
+                content: vec![AssistantPart::ToolCall {
+                    call: ToolCall {
+                        id: "1".into(),
+                        name: "add".into(),
+                        arguments: JsonResult::Content {
+                            content: json!({"a": 2, "b": 2}),
+                        },
                     },
-                })],
+                    encrypted_content: None,
+                }],
             }),
             ChatMessage::tool(
                 "1".into(),

diff --git a/crates/llm/src/lib.rs b/crates/llm/src/lib.rs
@@ -25,7 +25,10 @@ impl ChatMessage {
 
     pub fn assistant(content: String) -> Self {
         Self::Assistant(AssistantMessage {
-            content: vec![AssistantPart::Text { text: content }],
+            content: vec![AssistantPart::Text {
+                text: content,
+                encrypted_content: None,
+            }],
         })
     }
 
@@ -106,9 +109,22 @@ impl ToolCall {
 #[derive(Clone, Debug, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum AssistantPart {
-    Text { text: String },
-    ToolCall(ToolCall),
-    Thinking { text: String },
+    Text {
+        text: String,
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        encrypted_content: Option<String>,
+    },
+    ToolCall {
+        #[serde(flatten)]
+        call: ToolCall,
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        encrypted_content: Option<String>,
+    },
+    Thinking {
+        text: String,
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        encrypted_content: Option<String>,
+    },
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize)]
@@ -225,9 +241,7 @@ pub fn client_from(
 
 #[derive(Debug, Clone)]
 pub enum ResponseChunk {
-    Thinking(String),
-    ToolCall(ToolCall),
-    Content(String),
+    Part(AssistantPart),
     Usage {
         input_tokens: u32,
         output_tokens: u32,

diff --git a/crates/llm/src/ollama.rs b/crates/llm/src/ollama.rs
@@ -57,24 +57,24 @@ impl LlmClient for OllamaClient {
                             OllamaChatMessage::new(OllamaMessageRole::Assistant, String::new());
                         for part in a.content {
                             match part {
-                                AssistantPart::Text { text } => {
+                                AssistantPart::Text { text, .. } => {
                                     msg.content.push_str(&text);
                                 }
-                                AssistantPart::ToolCall(tc) => {
-                                    let args = match tc.arguments {
+                                AssistantPart::ToolCall { call, .. } => {
+                                    let args = match call.arguments {
                                         JsonResult::Content { .. } => {
-                                            tc.arguments_content_with_id()
+                                            call.arguments_content_with_id()
                                         }
                                         JsonResult::Error { .. } => Value::Null,
                                     };
                                     msg.tool_calls.push(OllamaToolCall {
                                         function: OllamaToolCallFunction {
-                                            name: tc.name,
+                                            name: call.name,
                                             arguments: args,
                                         },
                                     });
                                 }
-                                AssistantPart::Thinking { text } => {
+                                AssistantPart::Thinking { text, .. } => {
                                     let thinking = msg.thinking.get_or_insert_with(String::new);
                                     thinking.push_str(&text);
                                 }
@@ -126,7 +126,10 @@ impl LlmClient for OllamaClient {
                 let mut out: Vec<Result<ResponseChunk, Box<dyn Error + Send + Sync>>> = Vec::new();
                 if !r.message.thinking.clone().unwrap_or_default().is_empty() {
                     if let Some(thinking) = r.message.thinking.clone() {
-                        out.push(Ok(ResponseChunk::Thinking(thinking)));
+                        out.push(Ok(ResponseChunk::Part(AssistantPart::Thinking {
+                            text: thinking,
+                            encrypted_content: None,
+                        })));
                     }
                 }
                 let tool_calls: Vec<ToolCall> = r
@@ -142,10 +145,16 @@ impl LlmClient for OllamaClient {
                     })
                     .collect();
                 for tc in tool_calls {
-                    out.push(Ok(ResponseChunk::ToolCall(tc)));
+                    out.push(Ok(ResponseChunk::Part(AssistantPart::ToolCall {
+                        call: tc,
+                        encrypted_content: None,
+                    })));
                 }
                 if !r.message.content.is_empty() {
-                    out.push(Ok(ResponseChunk::Content(r.message.content)));
+                    out.push(Ok(ResponseChunk::Part(AssistantPart::Text {
+                        text: r.message.content,
+                        encrypted_content: None,
+                    })));
                 }
                 if r.done {
                     if let Some(f) = r.final_data.as_ref() {

diff --git a/crates/llm/src/openai_chat.rs b/crates/llm/src/openai_chat.rs
@@ -77,9 +77,9 @@ impl LlmClient for OpenAiChatClient {
                     let mut tool_calls_acc: Vec<ToolCall> = Vec::new();
                     for part in a.content {
                         match part {
-                            AssistantPart::Text { text } => content_acc.push_str(&text),
-                            AssistantPart::Thinking { text } => thinking_acc.push_str(&text),
-                            AssistantPart::ToolCall(tc) => tool_calls_acc.push(tc),
+                            AssistantPart::Text { text, .. } => content_acc.push_str(&text),
+                            AssistantPart::Thinking { text, .. } => thinking_acc.push_str(&text),
+                            AssistantPart::ToolCall { call, .. } => tool_calls_acc.push(call),
                         }
                     }
                     if !content_acc.is_empty() {
@@ -255,13 +255,22 @@ impl LlmClient for OpenAiChatClient {
                         None
                     };
                     if !thinking_acc.is_empty() {
-                        out.push(Ok(ResponseChunk::Thinking(thinking_acc)));
+                        out.push(Ok(ResponseChunk::Part(AssistantPart::Thinking {
+                            text: thinking_acc,
+                            encrypted_content: None,
+                        })));
                     }
                     for tc in tool_calls {
-                        out.push(Ok(ResponseChunk::ToolCall(tc)));
+                        out.push(Ok(ResponseChunk::Part(AssistantPart::ToolCall {
+                            call: tc,
+                            encrypted_content: None,
+                        })));
                     }
                     if !content_acc.is_empty() {
-                        out.push(Ok(ResponseChunk::Content(content_acc)));
+                        out.push(Ok(ResponseChunk::Part(AssistantPart::Text {
+                            text: content_acc,
+                            encrypted_content: None,
+                        })));
                     }
                     if let Some((input_tokens, output_tokens)) = usage {
                         out.push(Ok(ResponseChunk::Usage {