diff --git a/examples/google_thinking_example.rs b/examples/google_thinking_example.rs new file mode 100644 index 0000000..bc6829f --- /dev/null +++ b/examples/google_thinking_example.rs @@ -0,0 +1,145 @@ +// Example demonstrating Google Gemini's thinking/reasoning capabilities +// Gemini 2.5 and 3 series models support thinking for improved reasoning +use llm::{ + builder::{LLMBackend, LLMBuilder}, + chat::ChatMessage, +}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Get Google API key from environment variable + let api_key = std::env::var("GOOGLE_API_KEY").unwrap_or("google-key".into()); + + // Example 1: Using thinking budget (Gemini 2.5 models) + // thinking_budget controls how many tokens the model uses for reasoning + // - Set to a positive value (128-32768 for Pro, 0-24576 for Flash) for fixed budget + // - Set to -1 for dynamic thinking (model decides based on complexity) + // - Set to 0 to disable thinking (for supported models) + println!("=== Example 1: Gemini 2.5 with Thinking Budget ===\n"); + + let llm_with_budget = LLMBuilder::new() + .backend(LLMBackend::Google) + .api_key(&api_key) + .model("gemini-2.5-flash") + .max_tokens(4096) + .temperature(0.7) + .reasoning_budget_tokens(1024) // Set a specific thinking budget + .reasoning(true) // Include thought summaries in response + .build() + .expect("Failed to build LLM (Google)"); + + let messages = vec![ChatMessage::user() + .content("What is the sum of the first 50 prime numbers? Please show your reasoning.") + .build()]; + + match llm_with_budget.chat(&messages).await { + Ok(response) => { + // Check if there are thought summaries + if let Some(thinking) = response.thinking() { + println!("Thought Summary:\n{}\n", thinking); + } + if let Some(text) = response.text() { + println!("Answer:\n{}\n", text); + } + } + Err(e) => eprintln!("Chat error: {e}"), + } + + // Example 2: Using thinking level (Gemini 3 models) + // thinking_level controls reasoning depth: + // - "minimal": Minimal thinking, lowest latency + // - "low": Light reasoning, good for simple tasks + // - "medium": Balanced (Flash only) + // - "high": Deep reasoning, best for complex problems (default) + println!("\n=== Example 2: Gemini 3 with Thinking Level ===\n"); + + let llm_with_level = LLMBuilder::new() + .backend(LLMBackend::Google) + .api_key(&api_key) + .model("gemini-3-flash-preview") + .max_tokens(4096) + .temperature(0.7) + .reasoning_effort(llm::chat::ReasoningEffort::High) // Use high thinking level + .reasoning(true) // Include thought summaries + .build() + .expect("Failed to build LLM (Google)"); + + let complex_problem = vec![ChatMessage::user() + .content(r#" +Alice, Bob, and Carol each live in a different house on the same street: red, green, and blue. +The person who lives in the red house owns a cat. +Bob does not live in the green house. +Carol owns a dog. +The green house is to the left of the red house. +Alice does not own a cat. +Who lives in each house, and what pet do they own? + "#) + .build()]; + + match llm_with_level.chat(&complex_problem).await { + Ok(response) => { + if let Some(thinking) = response.thinking() { + println!("Thought Summary:\n{}\n", thinking); + } + if let Some(text) = response.text() { + println!("Answer:\n{}\n", text); + } + // Show usage information including thinking tokens + if let Some(usage) = response.usage() { + println!("Token Usage:"); + println!(" Prompt tokens: {}", usage.prompt_tokens); + println!(" Completion tokens: {}", usage.completion_tokens); + println!(" Total tokens: {}", usage.total_tokens); + } + } + Err(e) => eprintln!("Chat error: {e}"), + } + + // Example 3: Combining thinking with structured output + println!("\n=== Example 3: Thinking with Structured Output ===\n"); + + let schema = r#" + { + "name": "solution", + "schema": { + "type": "object", + "properties": { + "alice_house": { "type": "string" }, + "alice_pet": { "type": "string" }, + "bob_house": { "type": "string" }, + "bob_pet": { "type": "string" }, + "carol_house": { "type": "string" }, + "carol_pet": { "type": "string" }, + "explanation": { "type": "string" } + }, + "required": ["alice_house", "alice_pet", "bob_house", "bob_pet", "carol_house", "carol_pet", "explanation"] + } + } + "#; + let schema: llm::chat::StructuredOutputFormat = serde_json::from_str(schema)?; + + let llm_structured = LLMBuilder::new() + .backend(LLMBackend::Google) + .api_key(&api_key) + .model("gemini-2.5-flash") + .max_tokens(4096) + .reasoning_budget_tokens(2048) + .reasoning(true) + .schema(schema) + .build() + .expect("Failed to build LLM (Google)"); + + match llm_structured.chat(&complex_problem).await { + Ok(response) => { + if let Some(thinking) = response.thinking() { + println!("Thought Summary:\n{}\n", thinking); + } + if let Some(text) = response.text() { + println!("Structured Answer (JSON):\n{}\n", text); + } + } + Err(e) => eprintln!("Chat error: {e}"), + } + + Ok(()) +} diff --git a/src/backends/google.rs b/src/backends/google.rs index 32fbb9d..721d4fd 100644 --- a/src/backends/google.rs +++ b/src/backends/google.rs @@ -87,6 +87,12 @@ pub struct Google { pub json_schema: Option, /// Available tools for function calling pub tools: Option>, + /// Thinking budget tokens (for Gemini 2.5 models) + pub thinking_budget: Option, + /// Thinking level (for Gemini 3 models): "low", "medium", "high", "minimal" + pub thinking_level: Option, + /// Whether to include thought summaries in responses + pub include_thoughts: Option, /// HTTP client for making API requests client: Client, } @@ -153,6 +159,24 @@ struct GoogleGenerationConfig { /// A schema for structured output #[serde(skip_serializing_if = "Option::is_none")] response_schema: Option, + /// Thinking/reasoning configuration + #[serde(skip_serializing_if = "Option::is_none", rename = "thinkingConfig")] + thinking_config: Option, +} + +/// Configuration for thinking/reasoning capabilities +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +struct GoogleThinkingConfig { + /// Budget for thinking tokens (Gemini 2.5 models) + #[serde(skip_serializing_if = "Option::is_none")] + thinking_budget: Option, + /// Thinking level (Gemini 3 models): "low", "medium", "high", "minimal" + #[serde(skip_serializing_if = "Option::is_none")] + thinking_level: Option, + /// Whether to include thought summaries in the response + #[serde(skip_serializing_if = "Option::is_none")] + include_thoughts: Option, } /// Response from the chat completion API @@ -177,6 +201,9 @@ struct GoogleUsageMetadata { /// Total number of tokens used #[serde(rename = "totalTokenCount")] total_token_count: Option, + /// Number of tokens used for thinking/reasoning + #[serde(rename = "thoughtsTokenCount")] + thoughts_token_count: Option, } /// Response from the streaming chat completion API @@ -235,7 +262,10 @@ impl ChatResponse for GoogleChatResponse { fn text(&self) -> Option { self.candidates .first() - .map(|c| c.content.parts.iter().map(|p| p.text.clone()).collect()) + .map(|c| c.content.parts.iter() + .filter(|p| !p.thought) // Exclude thought parts from regular text + .map(|p| p.text.clone()) + .collect()) } fn tool_calls(&self) -> Option> { @@ -291,6 +321,24 @@ impl ChatResponse for GoogleChatResponse { }) } + fn thinking(&self) -> Option { + self.candidates.first().and_then(|c| { + let thoughts: Vec = c + .content + .parts + .iter() + .filter(|p| p.thought && !p.text.is_empty()) + .map(|p| p.text.clone()) + .collect(); + + if thoughts.is_empty() { + None + } else { + Some(thoughts.join("\n")) + } + }) + } + fn usage(&self) -> Option { self.usage_metadata.as_ref().and_then(|metadata| { match (metadata.prompt_token_count, metadata.candidates_token_count) { @@ -318,6 +366,12 @@ struct GoogleResponsePart { /// Function call contained in this part #[serde(rename = "functionCall")] function_call: Option, + /// Whether this part contains a thought summary + #[serde(default)] + thought: bool, + /// Thought signature for maintaining reasoning context across turns + #[serde(rename = "thoughtSignature")] + thought_signature: Option, } /// MIME type of the response @@ -479,6 +533,9 @@ impl Google { /// * `top_k` - Top-k sampling parameter /// * `json_schema` - JSON schema for structured output /// * `tools` - Function tools that the model can use + /// * `thinking_budget` - Budget for thinking tokens (Gemini 2.5 models) + /// * `thinking_level` - Thinking level (Gemini 3 models): "low", "medium", "high", "minimal" + /// * `include_thoughts` - Whether to include thought summaries in responses /// /// # Returns /// @@ -495,6 +552,9 @@ impl Google { top_k: Option, json_schema: Option, tools: Option>, + thinking_budget: Option, + thinking_level: Option, + include_thoughts: Option, ) -> Self { let mut builder = Client::builder(); if let Some(sec) = timeout_seconds { @@ -511,9 +571,68 @@ impl Google { top_k, json_schema, tools, + thinking_budget, + thinking_level, + include_thoughts, client: builder.build().expect("Failed to build reqwest Client"), } } + + /// Creates the generation config for API requests + fn build_generation_config(&self) -> Option { + // Check if we have any config to send + if self.max_tokens.is_none() + && self.temperature.is_none() + && self.top_p.is_none() + && self.top_k.is_none() + && self.json_schema.is_none() + && self.thinking_budget.is_none() + && self.thinking_level.is_none() + && self.include_thoughts.is_none() + { + return None; + } + + // Handle structured output schema + let (response_mime_type, response_schema) = if let Some(json_schema) = &self.json_schema { + if let Some(schema) = &json_schema.schema { + // If the schema has an "additionalProperties" field (as required by OpenAI), remove it as Google's API doesn't support it + let mut schema = schema.clone(); + if let Some(obj) = schema.as_object_mut() { + obj.remove("additionalProperties"); + } + (Some(GoogleResponseMimeType::Json), Some(schema)) + } else { + (None, None) + } + } else { + (None, None) + }; + + // Build thinking config if any thinking parameters are set + let thinking_config = if self.thinking_budget.is_some() + || self.thinking_level.is_some() + || self.include_thoughts.is_some() + { + Some(GoogleThinkingConfig { + thinking_budget: self.thinking_budget, + thinking_level: self.thinking_level.clone(), + include_thoughts: self.include_thoughts, + }) + } else { + None + }; + + Some(GoogleGenerationConfig { + max_output_tokens: self.max_tokens, + temperature: self.temperature, + top_p: self.top_p, + top_k: self.top_k, + response_mime_type, + response_schema, + thinking_config, + }) + } } #[async_trait] @@ -601,40 +720,7 @@ impl ChatProvider for Google { } // Remove generation_config if empty to avoid validation errors - let generation_config = if self.max_tokens.is_none() - && self.temperature.is_none() - && self.top_p.is_none() - && self.top_k.is_none() - && self.json_schema.is_none() - { - None - } else { - // If json_schema and json_schema.schema are not None, use json_schema.schema as the response schema and set response_mime_type to JSON - // Google's API doesn't need the schema to have a "name" field, so we can just use the schema directly. - let (response_mime_type, response_schema) = if let Some(json_schema) = &self.json_schema - { - if let Some(schema) = &json_schema.schema { - // If the schema has an "additionalProperties" field (as required by OpenAI), remove it as Google's API doesn't support it - let mut schema = schema.clone(); - if let Some(obj) = schema.as_object_mut() { - obj.remove("additionalProperties"); - } - (Some(GoogleResponseMimeType::Json), Some(schema)) - } else { - (None, None) - } - } else { - (None, None) - }; - Some(GoogleGenerationConfig { - max_output_tokens: self.max_tokens, - temperature: self.temperature, - top_p: self.top_p, - top_k: self.top_k, - response_mime_type, - response_schema, - }) - }; + let generation_config = self.build_generation_config(); let req_body = GoogleChatRequest { contents: chat_contents, @@ -776,36 +862,7 @@ impl ChatProvider for Google { }); // Build generation config - let generation_config = { - // If json_schema and json_schema.schema are not None, use json_schema.schema as the response schema and set response_mime_type to JSON - // Google's API doesn't need the schema to have a "name" field, so we can just use the schema directly. - let (response_mime_type, response_schema) = if let Some(json_schema) = &self.json_schema - { - if let Some(schema) = &json_schema.schema { - // If the schema has an "additionalProperties" field (as required by OpenAI), remove it as Google's API doesn't support it - let mut schema = schema.clone(); - - if let Some(obj) = schema.as_object_mut() { - obj.remove("additionalProperties"); - } - - (Some(GoogleResponseMimeType::Json), Some(schema)) - } else { - (None, None) - } - } else { - (None, None) - }; - - Some(GoogleGenerationConfig { - max_output_tokens: self.max_tokens, - temperature: self.temperature, - top_p: self.top_p, - top_k: self.top_k, - response_mime_type, - response_schema, - }) - }; + let generation_config = self.build_generation_config(); let req_body = GoogleChatRequest { contents: chat_contents, @@ -941,22 +998,7 @@ impl ChatProvider for Google { }, }); } - let generation_config = if self.max_tokens.is_none() - && self.temperature.is_none() - && self.top_p.is_none() - && self.top_k.is_none() - { - None - } else { - Some(GoogleGenerationConfig { - max_output_tokens: self.max_tokens, - temperature: self.temperature, - top_p: self.top_p, - top_k: self.top_k, - response_mime_type: None, - response_schema: None, - }) - }; + let generation_config = self.build_generation_config(); let req_body = GoogleChatRequest { contents: chat_contents, diff --git a/src/builder.rs b/src/builder.rs index 92033b1..6d52dc5 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -847,6 +847,10 @@ impl LLMBuilder { LLMError::InvalidRequest("No API key provided for Google".to_string()) })?; + // Convert reasoning_budget_tokens (u32) to thinking_budget (i32) + // -1 means dynamic thinking, 0 means off (for supported models) + let thinking_budget = self.reasoning_budget_tokens.map(|b| b as i32); + let google = crate::backends::google::Google::new( api_key, self.model, @@ -858,6 +862,9 @@ impl LLMBuilder { self.top_k, self.json_schema, tools, + thinking_budget, + self.reasoning_effort, // Used as thinking_level for Gemini 3 models + self.reasoning, // Used as include_thoughts ); Box::new(google) }