diff --git a/examples/google_thinking_example.rs b/examples/google_thinking_example.rs
new file mode 100644
index 0000000..bc6829f
--- /dev/null
+++ b/examples/google_thinking_example.rs
@@ -0,0 +1,145 @@
+// Example demonstrating Google Gemini's thinking/reasoning capabilities
+// Gemini 2.5 and 3 series models support thinking for improved reasoning
+use llm::{
+    builder::{LLMBackend, LLMBuilder},
+    chat::ChatMessage,
+};
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Get Google API key from environment variable
+    let api_key = std::env::var("GOOGLE_API_KEY").unwrap_or("google-key".into());
+
+    // Example 1: Using thinking budget (Gemini 2.5 models)
+    // thinking_budget controls how many tokens the model uses for reasoning
+    // - Set to a positive value (128-32768 for Pro, 0-24576 for Flash) for fixed budget
+    // - Set to -1 for dynamic thinking (model decides based on complexity)
+    // - Set to 0 to disable thinking (for supported models)
+    println!("=== Example 1: Gemini 2.5 with Thinking Budget ===\n");
+    
+    let llm_with_budget = LLMBuilder::new()
+        .backend(LLMBackend::Google)
+        .api_key(&api_key)
+        .model("gemini-2.5-flash")
+        .max_tokens(4096)
+        .temperature(0.7)
+        .reasoning_budget_tokens(1024) // Set a specific thinking budget
+        .reasoning(true) // Include thought summaries in response
+        .build()
+        .expect("Failed to build LLM (Google)");
+
+    let messages = vec![ChatMessage::user()
+        .content("What is the sum of the first 50 prime numbers? Please show your reasoning.")
+        .build()];
+
+    match llm_with_budget.chat(&messages).await {
+        Ok(response) => {
+            // Check if there are thought summaries
+            if let Some(thinking) = response.thinking() {
+                println!("Thought Summary:\n{}\n", thinking);
+            }
+            if let Some(text) = response.text() {
+                println!("Answer:\n{}\n", text);
+            }
+        }
+        Err(e) => eprintln!("Chat error: {e}"),
+    }
+
+    // Example 2: Using thinking level (Gemini 3 models)
+    // thinking_level controls reasoning depth:
+    // - "minimal": Minimal thinking, lowest latency
+    // - "low": Light reasoning, good for simple tasks
+    // - "medium": Balanced (Flash only)
+    // - "high": Deep reasoning, best for complex problems (default)
+    println!("\n=== Example 2: Gemini 3 with Thinking Level ===\n");
+
+    let llm_with_level = LLMBuilder::new()
+        .backend(LLMBackend::Google)
+        .api_key(&api_key)
+        .model("gemini-3-flash-preview")
+        .max_tokens(4096)
+        .temperature(0.7)
+        .reasoning_effort(llm::chat::ReasoningEffort::High) // Use high thinking level
+        .reasoning(true) // Include thought summaries
+        .build()
+        .expect("Failed to build LLM (Google)");
+
+    let complex_problem = vec![ChatMessage::user()
+        .content(r#"
+Alice, Bob, and Carol each live in a different house on the same street: red, green, and blue.
+The person who lives in the red house owns a cat.
+Bob does not live in the green house.
+Carol owns a dog.
+The green house is to the left of the red house.
+Alice does not own a cat.
+Who lives in each house, and what pet do they own?
+        "#)
+        .build()];
+
+    match llm_with_level.chat(&complex_problem).await {
+        Ok(response) => {
+            if let Some(thinking) = response.thinking() {
+                println!("Thought Summary:\n{}\n", thinking);
+            }
+            if let Some(text) = response.text() {
+                println!("Answer:\n{}\n", text);
+            }
+            // Show usage information including thinking tokens
+            if let Some(usage) = response.usage() {
+                println!("Token Usage:");
+                println!("  Prompt tokens: {}", usage.prompt_tokens);
+                println!("  Completion tokens: {}", usage.completion_tokens);
+                println!("  Total tokens: {}", usage.total_tokens);
+            }
+        }
+        Err(e) => eprintln!("Chat error: {e}"),
+    }
+
+    // Example 3: Combining thinking with structured output
+    println!("\n=== Example 3: Thinking with Structured Output ===\n");
+
+    let schema = r#"
+        {
+            "name": "solution",
+            "schema": {
+                "type": "object",
+                "properties": {
+                    "alice_house": { "type": "string" },
+                    "alice_pet": { "type": "string" },
+                    "bob_house": { "type": "string" },
+                    "bob_pet": { "type": "string" },
+                    "carol_house": { "type": "string" },
+                    "carol_pet": { "type": "string" },
+                    "explanation": { "type": "string" }
+                },
+                "required": ["alice_house", "alice_pet", "bob_house", "bob_pet", "carol_house", "carol_pet", "explanation"]
+            }
+        }
+    "#;
+    let schema: llm::chat::StructuredOutputFormat = serde_json::from_str(schema)?;
+
+    let llm_structured = LLMBuilder::new()
+        .backend(LLMBackend::Google)
+        .api_key(&api_key)
+        .model("gemini-2.5-flash")
+        .max_tokens(4096)
+        .reasoning_budget_tokens(2048)
+        .reasoning(true)
+        .schema(schema)
+        .build()
+        .expect("Failed to build LLM (Google)");
+
+    match llm_structured.chat(&complex_problem).await {
+        Ok(response) => {
+            if let Some(thinking) = response.thinking() {
+                println!("Thought Summary:\n{}\n", thinking);
+            }
+            if let Some(text) = response.text() {
+                println!("Structured Answer (JSON):\n{}\n", text);
+            }
+        }
+        Err(e) => eprintln!("Chat error: {e}"),
+    }
+
+    Ok(())
+}
diff --git a/src/backends/google.rs b/src/backends/google.rs
index 32fbb9d..721d4fd 100644
--- a/src/backends/google.rs
+++ b/src/backends/google.rs
@@ -87,6 +87,12 @@ pub struct Google {
     pub json_schema: Option<StructuredOutputFormat>,
     /// Available tools for function calling
     pub tools: Option<Vec<Tool>>,
+    /// Thinking budget tokens (for Gemini 2.5 models)
+    pub thinking_budget: Option<i32>,
+    /// Thinking level (for Gemini 3 models): "low", "medium", "high", "minimal"
+    pub thinking_level: Option<String>,
+    /// Whether to include thought summaries in responses
+    pub include_thoughts: Option<bool>,
     /// HTTP client for making API requests
     client: Client,
 }
@@ -153,6 +159,24 @@ struct GoogleGenerationConfig {
     /// A schema for structured output
     #[serde(skip_serializing_if = "Option::is_none")]
     response_schema: Option<Value>,
+    /// Thinking/reasoning configuration
+    #[serde(skip_serializing_if = "Option::is_none", rename = "thinkingConfig")]
+    thinking_config: Option<GoogleThinkingConfig>,
+}
+
+/// Configuration for thinking/reasoning capabilities
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+struct GoogleThinkingConfig {
+    /// Budget for thinking tokens (Gemini 2.5 models)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    thinking_budget: Option<i32>,
+    /// Thinking level (Gemini 3 models): "low", "medium", "high", "minimal"
+    #[serde(skip_serializing_if = "Option::is_none")]
+    thinking_level: Option<String>,
+    /// Whether to include thought summaries in the response
+    #[serde(skip_serializing_if = "Option::is_none")]
+    include_thoughts: Option<bool>,
 }
 
 /// Response from the chat completion API
@@ -177,6 +201,9 @@ struct GoogleUsageMetadata {
     /// Total number of tokens used
     #[serde(rename = "totalTokenCount")]
     total_token_count: Option<u32>,
+    /// Number of tokens used for thinking/reasoning
+    #[serde(rename = "thoughtsTokenCount")]
+    thoughts_token_count: Option<u32>,
 }
 
 /// Response from the streaming chat completion API
@@ -235,7 +262,10 @@ impl ChatResponse for GoogleChatResponse {
     fn text(&self) -> Option<String> {
         self.candidates
             .first()
-            .map(|c| c.content.parts.iter().map(|p| p.text.clone()).collect())
+            .map(|c| c.content.parts.iter()
+                .filter(|p| !p.thought) // Exclude thought parts from regular text
+                .map(|p| p.text.clone())
+                .collect())
     }
 
     fn tool_calls(&self) -> Option<Vec<ToolCall>> {
@@ -291,6 +321,24 @@ impl ChatResponse for GoogleChatResponse {
         })
     }
 
+    fn thinking(&self) -> Option<String> {
+        self.candidates.first().and_then(|c| {
+            let thoughts: Vec<String> = c
+                .content
+                .parts
+                .iter()
+                .filter(|p| p.thought && !p.text.is_empty())
+                .map(|p| p.text.clone())
+                .collect();
+            
+            if thoughts.is_empty() {
+                None
+            } else {
+                Some(thoughts.join("\n"))
+            }
+        })
+    }
+
     fn usage(&self) -> Option<Usage> {
         self.usage_metadata.as_ref().and_then(|metadata| {
             match (metadata.prompt_token_count, metadata.candidates_token_count) {
@@ -318,6 +366,12 @@ struct GoogleResponsePart {
     /// Function call contained in this part
     #[serde(rename = "functionCall")]
     function_call: Option<GoogleFunctionCall>,
+    /// Whether this part contains a thought summary
+    #[serde(default)]
+    thought: bool,
+    /// Thought signature for maintaining reasoning context across turns
+    #[serde(rename = "thoughtSignature")]
+    thought_signature: Option<String>,
 }
 
 /// MIME type of the response
@@ -479,6 +533,9 @@ impl Google {
     /// * `top_k` - Top-k sampling parameter
     /// * `json_schema` - JSON schema for structured output
     /// * `tools` - Function tools that the model can use
+    /// * `thinking_budget` - Budget for thinking tokens (Gemini 2.5 models)
+    /// * `thinking_level` - Thinking level (Gemini 3 models): "low", "medium", "high", "minimal"
+    /// * `include_thoughts` - Whether to include thought summaries in responses
     ///
     /// # Returns
     ///
@@ -495,6 +552,9 @@ impl Google {
         top_k: Option<u32>,
         json_schema: Option<StructuredOutputFormat>,
         tools: Option<Vec<Tool>>,
+        thinking_budget: Option<i32>,
+        thinking_level: Option<String>,
+        include_thoughts: Option<bool>,
     ) -> Self {
         let mut builder = Client::builder();
         if let Some(sec) = timeout_seconds {
@@ -511,9 +571,68 @@ impl Google {
             top_k,
             json_schema,
             tools,
+            thinking_budget,
+            thinking_level,
+            include_thoughts,
             client: builder.build().expect("Failed to build reqwest Client"),
         }
     }
+
+    /// Creates the generation config for API requests
+    fn build_generation_config(&self) -> Option<GoogleGenerationConfig> {
+        // Check if we have any config to send
+        if self.max_tokens.is_none()
+            && self.temperature.is_none()
+            && self.top_p.is_none()
+            && self.top_k.is_none()
+            && self.json_schema.is_none()
+            && self.thinking_budget.is_none()
+            && self.thinking_level.is_none()
+            && self.include_thoughts.is_none()
+        {
+            return None;
+        }
+
+        // Handle structured output schema
+        let (response_mime_type, response_schema) = if let Some(json_schema) = &self.json_schema {
+            if let Some(schema) = &json_schema.schema {
+                // If the schema has an "additionalProperties" field (as required by OpenAI), remove it as Google's API doesn't support it
+                let mut schema = schema.clone();
+                if let Some(obj) = schema.as_object_mut() {
+                    obj.remove("additionalProperties");
+                }
+                (Some(GoogleResponseMimeType::Json), Some(schema))
+            } else {
+                (None, None)
+            }
+        } else {
+            (None, None)
+        };
+
+        // Build thinking config if any thinking parameters are set
+        let thinking_config = if self.thinking_budget.is_some()
+            || self.thinking_level.is_some()
+            || self.include_thoughts.is_some()
+        {
+            Some(GoogleThinkingConfig {
+                thinking_budget: self.thinking_budget,
+                thinking_level: self.thinking_level.clone(),
+                include_thoughts: self.include_thoughts,
+            })
+        } else {
+            None
+        };
+
+        Some(GoogleGenerationConfig {
+            max_output_tokens: self.max_tokens,
+            temperature: self.temperature,
+            top_p: self.top_p,
+            top_k: self.top_k,
+            response_mime_type,
+            response_schema,
+            thinking_config,
+        })
+    }
 }
 
 #[async_trait]
@@ -601,40 +720,7 @@ impl ChatProvider for Google {
         }
 
         // Remove generation_config if empty to avoid validation errors
-        let generation_config = if self.max_tokens.is_none()
-            && self.temperature.is_none()
-            && self.top_p.is_none()
-            && self.top_k.is_none()
-            && self.json_schema.is_none()
-        {
-            None
-        } else {
-            // If json_schema and json_schema.schema are not None, use json_schema.schema as the response schema and set response_mime_type to JSON
-            // Google's API doesn't need the schema to have a "name" field, so we can just use the schema directly.
-            let (response_mime_type, response_schema) = if let Some(json_schema) = &self.json_schema
-            {
-                if let Some(schema) = &json_schema.schema {
-                    // If the schema has an "additionalProperties" field (as required by OpenAI), remove it as Google's API doesn't support it
-                    let mut schema = schema.clone();
-                    if let Some(obj) = schema.as_object_mut() {
-                        obj.remove("additionalProperties");
-                    }
-                    (Some(GoogleResponseMimeType::Json), Some(schema))
-                } else {
-                    (None, None)
-                }
-            } else {
-                (None, None)
-            };
-            Some(GoogleGenerationConfig {
-                max_output_tokens: self.max_tokens,
-                temperature: self.temperature,
-                top_p: self.top_p,
-                top_k: self.top_k,
-                response_mime_type,
-                response_schema,
-            })
-        };
+        let generation_config = self.build_generation_config();
 
         let req_body = GoogleChatRequest {
             contents: chat_contents,
@@ -776,36 +862,7 @@ impl ChatProvider for Google {
         });
 
         // Build generation config
-        let generation_config = {
-            // If json_schema and json_schema.schema are not None, use json_schema.schema as the response schema and set response_mime_type to JSON
-            // Google's API doesn't need the schema to have a "name" field, so we can just use the schema directly.
-            let (response_mime_type, response_schema) = if let Some(json_schema) = &self.json_schema
-            {
-                if let Some(schema) = &json_schema.schema {
-                    // If the schema has an "additionalProperties" field (as required by OpenAI), remove it as Google's API doesn't support it
-                    let mut schema = schema.clone();
-
-                    if let Some(obj) = schema.as_object_mut() {
-                        obj.remove("additionalProperties");
-                    }
-
-                    (Some(GoogleResponseMimeType::Json), Some(schema))
-                } else {
-                    (None, None)
-                }
-            } else {
-                (None, None)
-            };
-
-            Some(GoogleGenerationConfig {
-                max_output_tokens: self.max_tokens,
-                temperature: self.temperature,
-                top_p: self.top_p,
-                top_k: self.top_k,
-                response_mime_type,
-                response_schema,
-            })
-        };
+        let generation_config = self.build_generation_config();
 
         let req_body = GoogleChatRequest {
             contents: chat_contents,
@@ -941,22 +998,7 @@ impl ChatProvider for Google {
                 },
             });
         }
-        let generation_config = if self.max_tokens.is_none()
-            && self.temperature.is_none()
-            && self.top_p.is_none()
-            && self.top_k.is_none()
-        {
-            None
-        } else {
-            Some(GoogleGenerationConfig {
-                max_output_tokens: self.max_tokens,
-                temperature: self.temperature,
-                top_p: self.top_p,
-                top_k: self.top_k,
-                response_mime_type: None,
-                response_schema: None,
-            })
-        };
+        let generation_config = self.build_generation_config();
 
         let req_body = GoogleChatRequest {
             contents: chat_contents,
diff --git a/src/builder.rs b/src/builder.rs
index 92033b1..6d52dc5 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -847,6 +847,10 @@ impl LLMBuilder {
                         LLMError::InvalidRequest("No API key provided for Google".to_string())
                     })?;
 
+                    // Convert reasoning_budget_tokens (u32) to thinking_budget (i32)
+                    // -1 means dynamic thinking, 0 means off (for supported models)
+                    let thinking_budget = self.reasoning_budget_tokens.map(|b| b as i32);
+
                     let google = crate::backends::google::Google::new(
                         api_key,
                         self.model,
@@ -858,6 +862,9 @@ impl LLMBuilder {
                         self.top_k,
                         self.json_schema,
                         tools,
+                        thinking_budget,
+                        self.reasoning_effort, // Used as thinking_level for Gemini 3 models
+                        self.reasoning, // Used as include_thoughts
                     );
                     Box::new(google)
                 }