diff --git a/spoon_ai/agents/base.py b/spoon_ai/agents/base.py
index b19f807..ee57e14 100644
--- a/spoon_ai/agents/base.py
+++ b/spoon_ai/agents/base.py
@@ -257,15 +257,24 @@ async def add_message(
                     elif role == "assistant":
                         if tool_calls:
                             formatted_tool_calls = [
-                                {
-                                    "id": toolcall.id,
-                                    "type": "function",
-                                    "function": (
-                                        toolcall.function.model_dump()
-                                        if isinstance(toolcall.function, BaseModel)
-                                        else toolcall.function
-                                    )
-                                }
+                                (
+                                    toolcall.model_dump()
+                                    if isinstance(toolcall, BaseModel)
+                                    else {
+                                        "id": toolcall.id,
+                                        "type": "function",
+                                        "function": (
+                                            toolcall.function.model_dump()
+                                            if isinstance(toolcall.function, BaseModel)
+                                            else toolcall.function
+                                        ),
+                                        **(
+                                            {"metadata": getattr(toolcall, "metadata", None)}
+                                            if getattr(toolcall, "metadata", None) is not None
+                                            else {}
+                                        ),
+                                    }
+                                )
                                 for toolcall in tool_calls
                             ]
                             message = Message(
diff --git a/spoon_ai/agents/spoon_react.py b/spoon_ai/agents/spoon_react.py
index 5061cad..30ada7b 100644
--- a/spoon_ai/agents/spoon_react.py
+++ b/spoon_ai/agents/spoon_react.py
@@ -192,6 +192,7 @@ async def run(
         request: Optional[str] = None,
         timeout: Optional[float] = None,
         thinking: bool = False,
+        reasoning_effort: Optional[str] = None,
     ) -> str:
         """Ensure prompts reflect current tools before running."""
         self._refresh_prompts()
@@ -202,4 +203,6 @@ async def run(
             kwargs["timeout"] = timeout
         if thinking:
             kwargs["thinking"] = True
+        if reasoning_effort is not None:
+            kwargs["reasoning_effort"] = reasoning_effort
         return await super().run(**kwargs)
diff --git a/spoon_ai/agents/spoon_react_skill.py b/spoon_ai/agents/spoon_react_skill.py
index 12f49a1..9869fde 100644
--- a/spoon_ai/agents/spoon_react_skill.py
+++ b/spoon_ai/agents/spoon_react_skill.py
@@ -94,6 +94,7 @@ async def run(
         request: Optional[str] = None,
         timeout: Optional[float] = None,
         thinking: bool = False,
+        reasoning_effort: Optional[str] = None,
     ) -> str:
         """
         Execute agent with per-turn auto skill activation.
@@ -122,6 +123,8 @@ async def _runner(req: Optional[str]) -> str:
                 kwargs["timeout"] = timeout
             if thinking:
                 kwargs["thinking"] = True
+            if reasoning_effort is not None:
+                kwargs["reasoning_effort"] = reasoning_effort
             return await super(SpoonReactSkill, self).run(**kwargs)
 
         return await self._run_with_auto_skills(request, _runner)
diff --git a/spoon_ai/agents/toolcall.py b/spoon_ai/agents/toolcall.py
index 20ea253..6676659 100644
--- a/spoon_ai/agents/toolcall.py
+++ b/spoon_ai/agents/toolcall.py
@@ -50,6 +50,7 @@ class ToolCallAgent(ReActAgent):
 
     # Track last tool error for higher-level fallbacks
     last_tool_error: Optional[str] = Field(default=None, exclude=True)
+    last_reasoning_summary: Optional[str] = Field(default=None, exclude=True)
 
     # Reduced default timeout as per user request (blockchain operations will focus on submission)
     _default_timeout: float = 120.0
@@ -122,6 +123,7 @@ async def think(
         thinking: bool = False,
         reasoning_effort: Optional[str] = None,
     ) -> bool:
+        self.last_reasoning_summary = None
         last_role = getattr(self.memory.messages[-1], "role", None) if self.memory.messages else None
         if self.next_step_prompt and last_role != "user":
             await self.add_message("user", self.next_step_prompt)
@@ -228,6 +230,8 @@ def convert_mcp_tool(tool: MCPTool) -> dict:
 
         self.tool_calls = response.tool_calls
         response_metadata = getattr(response, "metadata", {}) or {}
+        if isinstance(response_metadata, dict):
+            self.last_reasoning_summary = response_metadata.get("reasoning")
         streamed_content = bool(
             isinstance(response_metadata, dict)
             and response_metadata.get("streamed_content")
@@ -259,10 +263,10 @@ def convert_mcp_tool(tool: MCPTool) -> dict:
             if response.content and not streamed_content:
                 self.output_queue.put_nowait(
                     build_output_queue_event(
-                        event_type="thinking",
+                        event_type="content",
                         delta=response.content,
                         metadata={
-                            "phase": "think",
+                            "phase": "progress",
                             "source": "toolcall_agent",
                         },
                     )
diff --git a/spoon_ai/chat.py b/spoon_ai/chat.py
index 44eb411..c34853d 100644
--- a/spoon_ai/chat.py
+++ b/spoon_ai/chat.py
@@ -824,6 +824,8 @@ def _normalize_tool_request_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, An
                 output_config = dict(normalized.get("output_config") or {})
                 output_config["effort"] = anthropic_effort
                 normalized["output_config"] = output_config
+                if not thinking and self._anthropic_supports_adaptive_thinking(model):
+                    normalized["thinking"] = {"type": "adaptive"}
 
             if thinking:
                 if isinstance(thinking, dict):
diff --git a/spoon_ai/llm/providers/anthropic_provider.py b/spoon_ai/llm/providers/anthropic_provider.py
index 6b7849f..a139bca 100644
--- a/spoon_ai/llm/providers/anthropic_provider.py
+++ b/spoon_ai/llm/providers/anthropic_provider.py
@@ -346,22 +346,68 @@ def get_cache_metrics(self) -> Dict[str, int]:
         return self.cache_metrics.copy()
 
     @staticmethod
-    def _normalize_thinking_param(thinking: Any) -> Optional[Dict[str, Any]]:
+    def _canonical_model_name(model: str) -> str:
+        normalized = (model or "").strip().lower().replace("_", "-").replace(".", "-")
+        return normalized.rsplit("/", 1)[-1]
+
+    @classmethod
+    def _requires_adaptive_thinking(cls, model: str) -> bool:
+        canonical = cls._canonical_model_name(model)
+        return canonical.startswith("claude-opus-4-7")
+
+    @staticmethod
+    def _thinking_enabled(thinking_config: Optional[Dict[str, Any]]) -> bool:
+        if not isinstance(thinking_config, dict):
+            return False
+        return str(thinking_config.get("type") or "").strip().lower() != "disabled"
+
+    @staticmethod
+    def _tool_choice_forces_tools(tool_choice: Any) -> bool:
+        if not tool_choice:
+            return False
+
+        if isinstance(tool_choice, str):
+            return tool_choice.strip().lower() not in {"auto", "none"}
+
+        if isinstance(tool_choice, dict):
+            return str(tool_choice.get("type") or "").strip().lower() not in {"", "auto", "none"}
+
+        return True
+
+    @classmethod
+    def _normalize_thinking_param(
+        cls,
+        thinking: Any,
+        *,
+        model: Optional[str] = None,
+        output_config: Any = None,
+    ) -> Optional[Dict[str, Any]]:
         """Accept a boolean alias but send Anthropic the structured thinking object."""
+        requires_adaptive = bool(model) and cls._requires_adaptive_thinking(model)
+        has_effort = isinstance(output_config, dict) and bool(output_config.get("effort"))
+
         if isinstance(thinking, dict):
             normalized = dict(thinking)
             thinking_type = str(normalized.get("type") or "").strip().lower()
             if thinking_type == "adaptive":
                 return {"type": "adaptive"}
+            if thinking_type == "disabled":
+                return normalized
+            if requires_adaptive or has_effort:
+                return {"type": "adaptive"}
             if thinking_type != "disabled":
                 normalized.setdefault("type", "enabled")
                 normalized.setdefault("budget_tokens", 1024)
             return normalized
         if thinking is True:
+            if requires_adaptive or has_effort:
+                return {"type": "adaptive"}
             return {
                 "type": "enabled",
                 "budget_tokens": 1024,
             }
+        if thinking is None and has_effort:
+            return {"type": "adaptive"}
         return None
     
     async def chat(self, messages: List[Message], **kwargs) -> LLMResponse:
@@ -389,16 +435,22 @@ async def chat(self, messages: List[Message], **kwargs) -> LLMResponse:
                 k: v for k, v in kwargs.items() if k not in ['model', 'max_tokens', 'temperature']
             }
             thinking_config = self._normalize_thinking_param(
-                extra_request_kwargs.pop("thinking", None)
+                extra_request_kwargs.pop("thinking", None),
+                model=model,
+                output_config=extra_request_kwargs.get("output_config"),
             )
+            thinking_enabled = self._thinking_enabled(thinking_config)
+            if thinking_enabled:
+                extra_request_kwargs.pop("top_k", None)
 
             request_params = {
                 'model': model,
                 'max_tokens': max_tokens,
-                'temperature': temperature,
                 'messages': anthropic_messages,
                 **extra_request_kwargs,
             }
+            if not thinking_enabled:
+                request_params['temperature'] = temperature
             if thinking_config is not None:
                 request_params['thinking'] = thinking_config
             
@@ -453,16 +505,22 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] =
                 if k not in ['model', 'max_tokens', 'temperature', 'callbacks']
             }
             thinking_config = self._normalize_thinking_param(
-                extra_request_kwargs.pop("thinking", None)
+                extra_request_kwargs.pop("thinking", None),
+                model=model,
+                output_config=extra_request_kwargs.get("output_config"),
             )
+            thinking_enabled = self._thinking_enabled(thinking_config)
+            if thinking_enabled:
+                extra_request_kwargs.pop("top_k", None)
 
             request_params = {
                 'model': model,
                 'max_tokens': max_tokens,
-                'temperature': temperature,
                 'messages': anthropic_messages,
                 **extra_request_kwargs,
             }
+            if not thinking_enabled:
+                request_params['temperature'] = temperature
             if thinking_config is not None:
                 request_params['thinking'] = thinking_config
             
@@ -472,6 +530,7 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] =
             
             # Process streaming response
             full_content = ""
+            full_reasoning = ""
             chunk_index = 0
             finish_reason = None
             usage_data = None
@@ -506,6 +565,30 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] =
                         )
                         chunk_index += 1
                         yield response_chunk
+                    elif chunk.type == "content_block_delta" and chunk.delta.type == "thinking_delta":
+                        token = getattr(chunk.delta, "thinking", "") or ""
+                        if not token:
+                            continue
+                        full_reasoning += token
+                        yield LLMResponseChunk(
+                            content=full_reasoning,
+                            delta=token,
+                            provider="anthropic",
+                            model=model,
+                            finish_reason=finish_reason,
+                            tool_calls=[],
+                            usage=usage_data,
+                            metadata={
+                                "chunk_index": chunk_index,
+                                "chunk_type": chunk.type,
+                                "type": "thinking",
+                                "phase": "think",
+                                "provider": "anthropic",
+                                "channel": "thinking",
+                            },
+                            chunk_index=chunk_index,
+                        )
+                        chunk_index += 1
                         
                     elif chunk.type == "message_start":
                         if hasattr(chunk, 'message') and hasattr(chunk.message, 'usage'):
@@ -592,22 +675,34 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw
                 if k not in ['model', 'max_tokens', 'temperature', 'tool_choice', 'output_queue']
             }
             thinking_config = self._normalize_thinking_param(
-                extra_request_kwargs.pop("thinking", None)
+                extra_request_kwargs.pop("thinking", None),
+                model=model,
+                output_config=extra_request_kwargs.get("output_config"),
             )
+            thinking_enabled = self._thinking_enabled(thinking_config)
+            if thinking_enabled:
+                extra_request_kwargs.pop("top_k", None)
 
             request_params = {
                 'model': model,
                 'max_tokens': max_tokens,
-                'temperature': temperature,
                 'messages': anthropic_messages,
                 'tools': anthropic_tools,
                 **extra_request_kwargs,
             }
+            if not thinking_enabled:
+                request_params['temperature'] = temperature
             if thinking_config is not None:
                 request_params['thinking'] = thinking_config
 
             # Anthropic expects tool_choice as an object, not a plain string/enum
             if tool_choice:
+                if thinking_enabled and self._tool_choice_forces_tools(tool_choice):
+                    logger.warning(
+                        "Anthropic thinking mode does not support forced tool_choice=%r; falling back to auto",
+                        tool_choice,
+                    )
+                    tool_choice = None
                 if isinstance(tool_choice, str):
                     request_params['tool_choice'] = {"type": tool_choice}
                 elif isinstance(tool_choice, dict):
diff --git a/spoon_ai/llm/providers/gemini_provider.py b/spoon_ai/llm/providers/gemini_provider.py
index b46d76e..a2ada6c 100644
--- a/spoon_ai/llm/providers/gemini_provider.py
+++ b/spoon_ai/llm/providers/gemini_provider.py
@@ -34,6 +34,7 @@
 import base64
 import re
 from spoon_ai.callbacks.manager import CallbackManager
+from spoon_ai.utils.streaming import build_output_queue_event
 from ..interface import LLMProviderInterface, LLMResponse, ProviderMetadata, ProviderCapability
 from ..errors import ProviderError, AuthenticationError, RateLimitError, ModelNotFoundError, NetworkError
 from ..message_utils import drop_orphaned_tool_messages
@@ -243,8 +244,12 @@ def _apply_thinking_defaults(
             if budget_int < min_thinking_budget:
                 budget_int = min_thinking_budget
 
+            include_thoughts = getattr(thinking_cfg, "include_thoughts", None)
+            if include_thoughts is None:
+                include_thoughts = True
+
             return max_tokens, types.ThinkingConfig(
-                include_thoughts=getattr(thinking_cfg, "include_thoughts", None),
+                include_thoughts=include_thoughts,
                 thinking_level=getattr(thinking_cfg, "thinking_level", None),
                 thinking_budget=budget_int,
             )
@@ -262,7 +267,96 @@ def _apply_thinking_defaults(
         if thinking_budget_int < min_thinking_budget:
             thinking_budget_int = min_thinking_budget
 
-        return max_tokens, types.ThinkingConfig(thinking_budget=thinking_budget_int)
+        return max_tokens, types.ThinkingConfig(
+            thinking_budget=thinking_budget_int,
+            include_thoughts=True,
+        )
+
+    @staticmethod
+    def _encode_thought_signature(thought_signature: Any) -> Optional[str]:
+        if thought_signature is None:
+            return None
+        if isinstance(thought_signature, str):
+            return thought_signature
+        if isinstance(thought_signature, (bytes, bytearray)):
+            return base64.b64encode(bytes(thought_signature)).decode("ascii")
+        return None
+
+    @staticmethod
+    def _decode_thought_signature(thought_signature: Any) -> Optional[bytes]:
+        if thought_signature is None:
+            return None
+        if isinstance(thought_signature, bytes):
+            return thought_signature
+        if isinstance(thought_signature, bytearray):
+            return bytes(thought_signature)
+        if isinstance(thought_signature, str):
+            try:
+                return base64.b64decode(thought_signature)
+            except Exception:
+                return None
+        return None
+
+    @classmethod
+    def _tool_call_from_function_call(
+        cls,
+        function_call: Any,
+        *,
+        thought_signature: Any = None,
+    ) -> ToolCall:
+        arguments_json = json.dumps(function_call.args) if function_call.args else "{}"
+        encoded_signature = cls._encode_thought_signature(thought_signature)
+        metadata = {"thought_signature": encoded_signature} if encoded_signature else None
+        return ToolCall(
+            id=f"call_{uuid.uuid4().hex[:8]}",
+            type="function",
+            function=Function(
+                name=function_call.name,
+                arguments=arguments_json,
+            ),
+            metadata=metadata,
+        )
+
+    @classmethod
+    def _function_call_part_from_tool_call(cls, tool_call: ToolCall) -> types.Part:
+        part = types.Part.from_function_call(
+            name=tool_call.function.name,
+            args=tool_call.function.get_arguments_dict(),
+        )
+        thought_signature = cls._decode_thought_signature(
+            getattr(tool_call, "metadata", None) and tool_call.metadata.get("thought_signature")
+        )
+        if thought_signature is not None:
+            part.thought_signature = thought_signature
+        return part
+
+    @classmethod
+    def _extract_stream_parts(
+        cls,
+        parts: List[Any],
+    ) -> tuple[str, str, List[ToolCall]]:
+        thought_delta = ""
+        visible_delta = ""
+        tool_calls: List[ToolCall] = []
+
+        for part in parts:
+            text = getattr(part, "text", None)
+            if text:
+                if getattr(part, "thought", False):
+                    thought_delta += text
+                else:
+                    visible_delta += text
+
+            function_call = getattr(part, "function_call", None)
+            if function_call:
+                tool_calls.append(
+                    cls._tool_call_from_function_call(
+                        function_call,
+                        thought_signature=getattr(part, "thought_signature", None),
+                    )
+                )
+
+        return thought_delta, visible_delta, tool_calls
 
     async def initialize(self, config: Dict[str, Any]) -> None:
         """Initialize the Gemini provider with configuration."""
@@ -551,11 +645,7 @@ def _convert_messages_for_tools(self, messages: List[Message]) -> tuple[Optional
                         parts.append(types.Part.from_text(text=message.content))
 
                     for tool_call in message.tool_calls:
-                        args = tool_call.function.get_arguments_dict()
-                        parts.append(types.Part.from_function_call(
-                            name=tool_call.function.name,
-                            args=args
-                        ))
+                        parts.append(self._function_call_part_from_tool_call(tool_call))
 
                     gemini_messages.append(types.Content(
                         role="model",
@@ -834,6 +924,7 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] =
 
             # Process streaming response
             full_content = ""
+            full_reasoning = ""
             chunk_index = 0
             finish_reason = None
             native_finish_reason = None
@@ -853,6 +944,7 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] =
                 )
 
                 async for part_response in stream:
+                    thinking_chunk = ""
                     chunk = ""
                     try:
                         if (
@@ -862,19 +954,41 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] =
                             and getattr(part_response.candidates[0].content, "parts", None)
                         ):
                             parts = part_response.candidates[0].content.parts
-                            chunk = "".join(
-                                [p.text for p in parts if getattr(p, "text", None)]
-                            )
+                            thinking_chunk, chunk, _ = self._extract_stream_parts(parts)
                     except Exception:
+                        thinking_chunk = ""
                         chunk = ""
 
                     # Fallback: some SDK responses expose streaming text via `part_response.text`
-                    if not chunk:
+                    if not thinking_chunk and not chunk:
                         maybe_text = getattr(part_response, "text", None)
                         if isinstance(maybe_text, str):
                             chunk = maybe_text
 
+                    if thinking_chunk:
+                        full_reasoning += thinking_chunk
+                        yield LLMResponseChunk(
+                            content=full_reasoning,
+                            delta=thinking_chunk,
+                            provider="gemini",
+                            model=model,
+                            finish_reason=None,
+                            tool_calls=[],
+                            usage=usage_data,
+                            metadata={
+                                "chunk_index": chunk_index,
+                                "type": "thinking",
+                                "phase": "think",
+                                "provider": "gemini",
+                                "channel": "thinking",
+                            },
+                            chunk_index=chunk_index,
+                        )
+                        chunk_index += 1
+
                     if not chunk:
+                        if thinking_chunk:
+                            continue
                         continue
 
                     full_content += chunk
@@ -1013,13 +1127,14 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw
                     )
 
                     full_content = ""
+                    full_reasoning = ""
                     emitted_chunk_count = 0
                     tool_calls: List[ToolCall] = []
                     finish_reason = "stop"
                     native_finish_reason = "stop"
 
                     async for part_response in stream:
-                        # Incremental text
+                        thinking_text = ""
                         delta_text = ""
                         try:
                             if (
@@ -1028,37 +1143,52 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw
                                 and getattr(part_response.candidates[0], "content", None) is not None
                                 and getattr(part_response.candidates[0].content, "parts", None)
                             ):
-                                for part in part_response.candidates[0].content.parts:
-                                    text = getattr(part, "text", None)
-                                    if text:
-                                        delta_text += text
-                                    function_call = getattr(part, "function_call", None)
-                                    if function_call:
-                                        arguments_json = json.dumps(function_call.args) if function_call.args else "{}"
-                                        tool_calls.append(
-                                            ToolCall(
-                                                id=f"call_{uuid.uuid4().hex[:8]}",
-                                                type="function",
-                                                function=Function(
-                                                    name=function_call.name,
-                                                    arguments=arguments_json,
-                                                ),
-                                            )
-                                        )
-                                        finish_reason = "tool_calls"
+                                thinking_text, delta_text, new_tool_calls = self._extract_stream_parts(
+                                    part_response.candidates[0].content.parts
+                                )
+                                if new_tool_calls:
+                                    tool_calls.extend(new_tool_calls)
+                                    finish_reason = "tool_calls"
                         except Exception:
+                            thinking_text = ""
                             delta_text = ""
 
-                        if not delta_text:
+                        if not thinking_text and not delta_text:
                             maybe_text = getattr(part_response, "text", None)
                             if isinstance(maybe_text, str):
                                 delta_text = maybe_text
 
+                        if thinking_text:
+                            full_reasoning += thinking_text
+                            try:
+                                output_queue.put_nowait(
+                                    build_output_queue_event(
+                                        event_type="thinking",
+                                        delta=thinking_text,
+                                        metadata={
+                                            "phase": "think",
+                                            "provider": "gemini",
+                                            "channel": "thinking",
+                                        },
+                                    )
+                                )
+                            except Exception:
+                                pass
+
                         if delta_text:
                             full_content += delta_text
                             emitted_chunk_count += 1
                             try:
-                                output_queue.put_nowait({"content": delta_text})
+                                output_queue.put_nowait(
+                                    build_output_queue_event(
+                                        event_type="content",
+                                        delta=delta_text,
+                                        metadata={
+                                            "provider": "gemini",
+                                            "channel": "text",
+                                        },
+                                    )
+                                )
                             except Exception:
                                 pass
 
@@ -1084,6 +1214,7 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw
                         metadata={
                             "streamed_content": emitted_chunk_count > 0,
                             "stream_chunk_count": emitted_chunk_count,
+                            **({"reasoning": full_reasoning} if full_reasoning else {}),
                         },
                     )
 
@@ -1142,6 +1273,8 @@ def _convert_response(self, response, duration: float, *, model: Optional[str] =
         response_text = ""
         image_paths = []
         tool_calls = []
+        reasoning = ""
+        saw_parts = False
 
         # Check if there are candidate results
         if hasattr(response, "candidates") and response.candidates:
@@ -1149,10 +1282,16 @@ def _convert_response(self, response, duration: float, *, model: Optional[str] =
             if hasattr(candidate, "content") and candidate.content:
                 # Iterate through all parts
                 parts = getattr(candidate.content, "parts", None) or []
+                saw_parts = bool(parts)
                 for part in parts:
                     # Check if there is text content
                     if hasattr(part, "text") and part.text:
-                        if content:
+                        if getattr(part, "thought", False):
+                            if reasoning:
+                                reasoning += "\n" + part.text
+                            else:
+                                reasoning = part.text
+                        elif content:
                             content += "\n" + part.text
                         else:
                             content = part.text
@@ -1205,7 +1344,7 @@ def _convert_response(self, response, duration: float, *, model: Optional[str] =
         # If no text content was obtained, fall back to response.text (if available).
         # Some Gemini SDK responses keep `response.text` populated even when
         # `candidates[0].content.parts` is empty.
-        if not content:
+        if not content and not saw_parts and not reasoning:
             fallback_text = self._safe_get_response_text(response)
             if fallback_text:
                 response_text = fallback_text
@@ -1229,7 +1368,8 @@ def _convert_response(self, response, duration: float, *, model: Optional[str] =
             duration=duration,
             metadata={
                 "image_paths": image_paths,
-                "has_images": len(image_paths) > 0
+                "has_images": len(image_paths) > 0,
+                **({"reasoning": reasoning} if reasoning else {}),
             }
         )
 
@@ -1238,6 +1378,8 @@ def _convert_tool_response(self, response, duration: float, *, model: Optional[s
         content = ""
         tool_calls = []
         finish_reason = "stop"
+        reasoning = ""
+        saw_parts = False
 
         # Check if there are candidate results
         if hasattr(response, "candidates") and response.candidates:
@@ -1245,39 +1387,30 @@ def _convert_tool_response(self, response, duration: float, *, model: Optional[s
             if hasattr(candidate, "content") and candidate.content:
                 # Iterate through all parts
                 parts = getattr(candidate.content, "parts", None) or []
+                saw_parts = bool(parts)
                 for part in parts:
                     # Check if there is text content
                     if hasattr(part, "text") and part.text:
-                        if content:
+                        if getattr(part, "thought", False):
+                            if reasoning:
+                                reasoning += "\n" + part.text
+                            else:
+                                reasoning = part.text
+                        elif content:
                             content += "\n" + part.text
                         else:
                             content = part.text
                     # Check if there is a function call
                     elif hasattr(part, "function_call") and part.function_call:
-                        # Convert Gemini function call to our ToolCall format
-                        function_call = part.function_call
-
-                        # Generate a unique ID for the tool call
-                        import uuid
-                        tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
-
-                        # Convert arguments to JSON string
-                        import json
-                        arguments_json = json.dumps(function_call.args) if function_call.args else "{}"
-
-                        tool_call = ToolCall(
-                            id=tool_call_id,
-                            type="function",
-                            function=Function(
-                                name=function_call.name,
-                                arguments=arguments_json
-                            )
+                        tool_call = self._tool_call_from_function_call(
+                            part.function_call,
+                            thought_signature=getattr(part, "thought_signature", None),
                         )
                         tool_calls.append(tool_call)
                         finish_reason = "tool_calls"
 
         # If no content was obtained, fall back to response.text (if available).
-        if not content:
+        if not content and not saw_parts and not reasoning:
             fallback_text = self._safe_get_response_text(response)
             if fallback_text:
                 content = fallback_text
@@ -1290,7 +1423,9 @@ def _convert_tool_response(self, response, duration: float, *, model: Optional[s
             native_finish_reason=finish_reason,
             tool_calls=tool_calls,
             duration=duration,
-            metadata={}
+            metadata={
+                **({"reasoning": reasoning} if reasoning else {}),
+            }
         )
 
     def get_metadata(self) -> ProviderMetadata:
diff --git a/spoon_ai/llm/providers/openai_compatible_provider.py b/spoon_ai/llm/providers/openai_compatible_provider.py
index a717027..85ae050 100644
--- a/spoon_ai/llm/providers/openai_compatible_provider.py
+++ b/spoon_ai/llm/providers/openai_compatible_provider.py
@@ -55,16 +55,25 @@ def _uses_completion_token_param(self, model: str) -> bool:
         tail = model_lower.split("/")[-1]  # strip any provider prefix like openrouter
         return tail.startswith("gpt-5") or tail.startswith("o1") or tail.startswith("o3") or tail.startswith("o4")
 
-    def _supports_temperature(self, model: str) -> bool:
+    def _supports_temperature(self, model: str, reasoning_effort: str | None = None) -> bool:
         """Whether this model supports custom temperature values.
 
-        Some newer OpenAI models (gpt-5.1*, o1*, o3*, o4*) only support the 
-        default temperature value (1.0) and will error on other values.
+        GPT-5 reasoning models reject explicit temperature settings more often
+        than the legacy chat-completions family:
+        - `gpt-5` and size-suffixed variants like `gpt-5-mini`: no temperature support
+        - versioned GPT-5 models like `gpt-5.4`: temperature only when reasoning is disabled
+        - `o*`: no temperature support
         """
         model_lower = (model or "").lower()
         tail = model_lower.split("/")[-1]  # strip any provider prefix
-        # gpt-5.1 and reasoning models don't support custom temperature
-        return not (tail.startswith("gpt-5.1") or tail.startswith("o1") or tail.startswith("o3") or tail.startswith("o4"))
+        if tail.startswith("o1") or tail.startswith("o3") or tail.startswith("o4"):
+            return False
+        if tail.startswith("gpt-5."):
+            normalized_effort = str(reasoning_effort or "").strip().lower()
+            return normalized_effort in {"", "none"}
+        if tail == "gpt-5" or tail.startswith("gpt-5-"):
+            return False
+        return True
 
     def _max_token_kwargs(self, model: str, max_tokens: int, overrides: Dict[str, Any]) -> Dict[str, Any]:
         """
@@ -833,7 +842,7 @@ async def chat(self, messages: List[Message], **kwargs) -> LLMResponse:
                 "stream": False,
             }
             # Only add temperature for models that support it
-            if self._supports_temperature(model):
+            if self._supports_temperature(model, kwargs.get("reasoning_effort")):
                 request_kwargs["temperature"] = temperature
             request_kwargs.update(self._max_token_kwargs(model, max_tokens, kwargs))
 
@@ -841,7 +850,10 @@ async def chat(self, messages: List[Message], **kwargs) -> LLMResponse:
                 request_kwargs["tools"] = tools
                 request_kwargs["tool_choice"] = tool_choice
 
-            extra_keys = {'model', 'max_tokens', 'max_completion_tokens', 'temperature', 'thinking', 'tools', 'tool_choice'}
+            extra_keys = {
+                'model', 'max_tokens', 'max_completion_tokens', 'temperature',
+                'thinking', 'reasoning_effort', 'tools', 'tool_choice'
+            }
             request_kwargs.update({k: v for k, v in kwargs.items() if k not in extra_keys})
             self._apply_reasoning_defaults(request_kwargs, kwargs)
 
@@ -893,7 +905,7 @@ async def chat_stream(self,messages: List[Message],callbacks: Optional[List[Base
                 "stream_options": {"include_usage": True},
             }
             # Only add temperature for models that support it
-            if self._supports_temperature(model):
+            if self._supports_temperature(model, kwargs.get("reasoning_effort")):
                 request_kwargs["temperature"] = temperature
             request_kwargs.update(self._max_token_kwargs(model, max_tokens, kwargs))
 
@@ -901,7 +913,10 @@ async def chat_stream(self,messages: List[Message],callbacks: Optional[List[Base
                 request_kwargs["tools"] = tools
                 request_kwargs["tool_choice"] = tool_choice
 
-            extra_keys = {'model', 'max_tokens', 'max_completion_tokens', 'temperature', 'thinking', 'callbacks', 'tools', 'tool_choice'}
+            extra_keys = {
+                'model', 'max_tokens', 'max_completion_tokens', 'temperature',
+                'thinking', 'reasoning_effort', 'callbacks', 'tools', 'tool_choice'
+            }
             request_kwargs.update({k: v for k, v in kwargs.items() if k not in extra_keys})
             self._apply_reasoning_defaults(request_kwargs, kwargs)
 
@@ -1104,7 +1119,7 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw
                 "messages": openai_messages,
             }
             # Only add temperature for models that support it
-            if self._supports_temperature(model):
+            if self._supports_temperature(model, kwargs.get("reasoning_effort")):
                 request_kwargs["temperature"] = temperature
             request_kwargs.update(self._max_token_kwargs(model, max_tokens, kwargs))
 
@@ -1112,7 +1127,10 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw
                 request_kwargs["tools"] = tools
                 request_kwargs["tool_choice"] = tool_choice
 
-            extra_keys = {'model', 'max_tokens', 'max_completion_tokens', 'temperature', 'thinking', 'tool_choice', 'output_queue'}
+            extra_keys = {
+                'model', 'max_tokens', 'max_completion_tokens', 'temperature',
+                'thinking', 'reasoning_effort', 'tool_choice', 'output_queue'
+            }
             request_kwargs.update({k: v for k, v in kwargs.items() if k not in extra_keys})
             self._apply_reasoning_defaults(request_kwargs, kwargs)
 
diff --git a/spoon_ai/llm/providers/openai_provider.py b/spoon_ai/llm/providers/openai_provider.py
index 34dfc45..4770e20 100644
--- a/spoon_ai/llm/providers/openai_provider.py
+++ b/spoon_ai/llm/providers/openai_provider.py
@@ -2,10 +2,22 @@
 OpenAI Provider implementation for the unified LLM interface.
 """
 
-from typing import Dict, Any
+from __future__ import annotations
+
+import asyncio
+import json
+from datetime import datetime
 from logging import getLogger
+from typing import Any, AsyncIterator, Dict, List, Optional
+from uuid import uuid4
+
+from spoon_ai.callbacks.base import BaseCallbackHandler
+from spoon_ai.callbacks.manager import CallbackManager
+from spoon_ai.schema import LLMResponseChunk, Message, ToolCall, Function
+from spoon_ai.utils.streaming import build_output_queue_event
 
-from ..interface import ProviderMetadata, ProviderCapability
+from ..errors import ProviderError
+from ..interface import LLMResponse, ProviderMetadata, ProviderCapability
 from ..registry import register_provider
 from .openai_compatible_provider import OpenAICompatibleProvider
 
@@ -20,13 +32,574 @@
 ])
 class OpenAIProvider(OpenAICompatibleProvider):
     """OpenAI provider implementation."""
-    
+
     def __init__(self):
         super().__init__()
         self.provider_name = "openai"
         self.default_base_url = "https://api.openai.com/v1"
         self.default_model = "gpt-4.1"
-    
+
+    @staticmethod
+    def _stringify_responses_content(content: Any) -> str:
+        if isinstance(content, str):
+            return content
+        if content is None:
+            return ""
+        if isinstance(content, (dict, list)):
+            try:
+                return json.dumps(content, ensure_ascii=False)
+            except TypeError:
+                return str(content)
+        return str(content)
+
+    def _supports_responses_reasoning(
+        self,
+        messages: List[Message],
+        tools: List[Dict[str, Any]] | None,
+        kwargs: Dict[str, Any],
+    ) -> bool:
+        if not (kwargs.get("thinking") or kwargs.get("reasoning_effort")):
+            return False
+        try:
+            openai_messages = self._convert_messages(messages)
+            self._convert_messages_to_responses_input(openai_messages)
+            if tools:
+                self._convert_tools_to_responses(tools)
+            self._convert_tool_choice_to_responses(kwargs.get("tool_choice", "auto"))
+        except ValueError:
+            return False
+        return True
+
+    def _convert_messages_to_responses_input(
+        self,
+        messages: List[Dict[str, Any]],
+    ) -> List[Dict[str, Any]]:
+        input_items: List[Dict[str, Any]] = []
+
+        for message in messages:
+            role = str(message.get("role") or "")
+            content = message.get("content")
+            tool_calls = message.get("tool_calls") or []
+
+            if role == "tool":
+                tool_call_id = message.get("tool_call_id")
+                if not tool_call_id:
+                    raise ValueError("Responses API requires tool_call_id for tool outputs")
+                input_items.append(
+                    {
+                        "type": "function_call_output",
+                        "call_id": tool_call_id,
+                        "output": self._stringify_responses_content(content),
+                    }
+                )
+                continue
+
+            if content is not None:
+                if not isinstance(content, str):
+                    raise ValueError("Responses reasoning path currently supports text-only messages")
+                input_items.append(
+                    {
+                        "type": "message",
+                        "role": role,
+                        "content": content,
+                    }
+                )
+
+            if role == "assistant" and tool_calls:
+                for tool_call in tool_calls:
+                    function_payload = tool_call.get("function") or {}
+                    input_items.append(
+                        {
+                            "type": "function_call",
+                            "call_id": tool_call.get("id"),
+                            "name": function_payload.get("name") or "unknown",
+                            "arguments": function_payload.get("arguments") or "{}",
+                        }
+                    )
+
+        return input_items
+
+    @staticmethod
+    def _convert_tools_to_responses(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        converted: List[Dict[str, Any]] = []
+        for tool in tools:
+            if tool.get("type") != "function":
+                raise ValueError("Responses reasoning path currently supports function tools only")
+            function_payload = tool.get("function") or {}
+            converted.append(
+                {
+                    "type": "function",
+                    "name": function_payload.get("name") or "unknown",
+                    "description": function_payload.get("description"),
+                    "parameters": function_payload.get("parameters") or {"type": "object", "properties": {}},
+                }
+            )
+        return converted
+
+    @staticmethod
+    def _convert_tool_choice_to_responses(tool_choice: Any) -> Any:
+        if tool_choice in {None, "auto", "required", "none"}:
+            return tool_choice or "auto"
+        if isinstance(tool_choice, dict):
+            if tool_choice.get("type") == "function":
+                function_payload = tool_choice.get("function") or {}
+                name = function_payload.get("name") or tool_choice.get("name")
+                if name:
+                    return {"type": "function", "name": name}
+            choice_type = tool_choice.get("type")
+            if choice_type in {"auto", "required", "none"}:
+                return choice_type
+        return tool_choice
+
+    def _build_responses_reasoning(self, kwargs: Dict[str, Any]) -> Dict[str, Any] | None:
+        if not (kwargs.get("thinking") or kwargs.get("reasoning_effort")):
+            return None
+
+        reasoning: Dict[str, Any] = {"summary": "detailed"}
+        effort = kwargs.get("reasoning_effort")
+        if effort:
+            reasoning["effort"] = effort
+        return reasoning
+
+    def _build_responses_request_kwargs(
+        self,
+        messages: List[Message],
+        kwargs: Dict[str, Any],
+        *,
+        tools: List[Dict[str, Any]] | None = None,
+        stream: bool,
+    ) -> Dict[str, Any]:
+        openai_messages = self._convert_messages(messages)
+        model = kwargs.get("model", self.model)
+        max_tokens = kwargs.get("max_completion_tokens", kwargs.get("max_tokens", self.max_tokens))
+        temperature = kwargs.get("temperature", self.temperature)
+
+        request_kwargs: Dict[str, Any] = {
+            "model": model,
+            "input": self._convert_messages_to_responses_input(openai_messages),
+            "stream": stream,
+            "max_output_tokens": max_tokens,
+        }
+        if self._supports_temperature(model, kwargs.get("reasoning_effort")):
+            request_kwargs["temperature"] = temperature
+
+        reasoning = self._build_responses_reasoning(kwargs)
+        if reasoning:
+            request_kwargs["reasoning"] = reasoning
+
+        if tools:
+            request_kwargs["tools"] = self._convert_tools_to_responses(tools)
+            request_kwargs["tool_choice"] = self._convert_tool_choice_to_responses(
+                kwargs.get("tool_choice", "auto")
+            )
+
+        extra_keys = {
+            "model",
+            "max_tokens",
+            "max_completion_tokens",
+            "temperature",
+            "thinking",
+            "reasoning_effort",
+            "tools",
+            "tool_choice",
+            "callbacks",
+            "output_queue",
+        }
+        request_kwargs.update({k: v for k, v in kwargs.items() if k not in extra_keys})
+        return request_kwargs
+
+    @staticmethod
+    def _extract_responses_text(response: Any) -> str:
+        direct = getattr(response, "output_text", None)
+        if isinstance(direct, str) and direct:
+            return direct
+
+        texts: List[str] = []
+        for item in getattr(response, "output", []) or []:
+            if getattr(item, "type", None) != "message":
+                continue
+            for content_part in getattr(item, "content", []) or []:
+                if getattr(content_part, "type", None) == "output_text":
+                    text = getattr(content_part, "text", None)
+                    if text:
+                        texts.append(str(text))
+        return "".join(texts)
+
+    @staticmethod
+    def _extract_responses_reasoning(response: Any) -> str:
+        parts: List[str] = []
+        for item in getattr(response, "output", []) or []:
+            if getattr(item, "type", None) != "reasoning":
+                continue
+            for summary_item in getattr(item, "summary", []) or []:
+                text = getattr(summary_item, "text", None)
+                if text:
+                    parts.append(str(text))
+        return "\n\n".join(parts)
+
+    @staticmethod
+    def _extract_responses_tool_calls(response: Any) -> List[ToolCall]:
+        tool_calls: List[ToolCall] = []
+        for item in getattr(response, "output", []) or []:
+            if getattr(item, "type", None) != "function_call":
+                continue
+            tool_calls.append(
+                ToolCall(
+                    id=getattr(item, "call_id", None) or getattr(item, "id", ""),
+                    type="function",
+                    function=Function(
+                        name=getattr(item, "name", None) or "unknown",
+                        arguments=getattr(item, "arguments", None) or "{}",
+                    ),
+                )
+            )
+        return tool_calls
+
+    @staticmethod
+    def _responses_finish_reason(response: Any, tool_calls: List[ToolCall]) -> str:
+        if tool_calls:
+            return "tool_calls"
+        incomplete = getattr(response, "incomplete_details", None)
+        reason = getattr(incomplete, "reason", None)
+        if reason == "max_output_tokens":
+            return "length"
+        if reason == "content_filter":
+            return "content_filter"
+        return "stop"
+
+    @staticmethod
+    def _responses_usage(response: Any) -> Dict[str, int] | None:
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        prompt_tokens = getattr(usage, "input_tokens", None)
+        completion_tokens = getattr(usage, "output_tokens", None)
+        total_tokens = getattr(usage, "total_tokens", None)
+        if prompt_tokens is None and completion_tokens is None and total_tokens is None:
+            return None
+        return {
+            "prompt_tokens": prompt_tokens or 0,
+            "completion_tokens": completion_tokens or 0,
+            "total_tokens": total_tokens or 0,
+        }
+
+    def _convert_responses_response(self, response: Any, duration: float) -> LLMResponse:
+        tool_calls = self._extract_responses_tool_calls(response)
+        reasoning_text = self._extract_responses_reasoning(response)
+        finish_reason = self._responses_finish_reason(response, tool_calls)
+        metadata = {
+            "response_id": getattr(response, "id", ""),
+            "created": getattr(response, "created_at", None),
+        }
+        if reasoning_text:
+            metadata["reasoning"] = reasoning_text
+
+        return LLMResponse(
+            content=self._extract_responses_text(response),
+            provider=self.get_provider_name(),
+            model=getattr(response, "model", self.model),
+            finish_reason=finish_reason,
+            native_finish_reason=finish_reason,
+            tool_calls=tool_calls,
+            usage=self._responses_usage(response),
+            duration=duration,
+            metadata=metadata,
+        )
+
+    async def chat(self, messages: List[Message], **kwargs) -> LLMResponse:
+        if not self.client or not self._supports_responses_reasoning(messages, None, kwargs):
+            return await super().chat(messages, **kwargs)
+
+        start_time = asyncio.get_event_loop().time()
+        response = await self.client.responses.create(
+            **self._build_responses_request_kwargs(messages, kwargs, stream=False)
+        )
+        duration = asyncio.get_event_loop().time() - start_time
+        return self._convert_responses_response(response, duration)
+
+    def _build_responses_stream_chunk(
+        self,
+        *,
+        content: str,
+        delta: str,
+        model: str,
+        finish_reason: str | None,
+        tool_calls: List[ToolCall],
+        tool_call_chunks: List[Dict[str, Any]] | None = None,
+        usage: Dict[str, int] | None = None,
+        metadata: Dict[str, Any] | None = None,
+        chunk_index: int = 0,
+    ) -> LLMResponseChunk:
+        return LLMResponseChunk(
+            content=content,
+            delta=delta,
+            provider=self.get_provider_name(),
+            model=model,
+            finish_reason=finish_reason,
+            tool_calls=tool_calls,
+            tool_call_chunks=tool_call_chunks,
+            usage=usage,
+            metadata=metadata or {},
+            chunk_index=chunk_index,
+            timestamp=datetime.now().isoformat(),
+        )
+
+    async def chat_stream(
+        self,
+        messages: List[Message],
+        callbacks: Optional[List[BaseCallbackHandler]] = None,
+        **kwargs,
+    ) -> AsyncIterator[LLMResponseChunk]:
+        tools = kwargs.get("tools")
+        if not self.client or not self._supports_responses_reasoning(messages, tools, kwargs):
+            async for chunk in super().chat_stream(messages, callbacks=callbacks, **kwargs):
+                yield chunk
+            return
+
+        callback_manager = CallbackManager.from_callbacks(callbacks)
+        run_id = uuid4()
+        model = kwargs.get("model", self.model)
+        start_time = asyncio.get_event_loop().time()
+
+        await callback_manager.on_llm_start(
+            run_id=run_id,
+            messages=messages,
+            model=model,
+            provider=self.get_provider_name(),
+        )
+
+        try:
+            request_kwargs = self._build_responses_request_kwargs(
+                messages,
+                kwargs,
+                tools=tools,
+                stream=True,
+            )
+            stream = await self.client.responses.create(**request_kwargs)
+
+            full_content = ""
+            full_reasoning = ""
+            chunk_index = 0
+            tool_calls: List[ToolCall] = []
+            latest_response = None
+
+            async for event in stream:
+                event_type = getattr(event, "type", None)
+                if event_type == "response.reasoning_summary_text.delta":
+                    delta = getattr(event, "delta", "") or ""
+                    if not delta:
+                        continue
+                    full_reasoning += delta
+                    yield self._build_responses_stream_chunk(
+                        content=full_reasoning,
+                        delta=delta,
+                        model=model,
+                        finish_reason=None,
+                        tool_calls=tool_calls,
+                        metadata={
+                            "type": "thinking",
+                            "phase": "think",
+                            "provider": self.get_provider_name(),
+                            "channel": "thinking",
+                        },
+                        chunk_index=chunk_index,
+                    )
+                    chunk_index += 1
+                    continue
+
+                if event_type == "response.output_text.delta":
+                    delta = getattr(event, "delta", "") or ""
+                    if not delta:
+                        continue
+                    full_content += delta
+                    response_chunk = self._build_responses_stream_chunk(
+                        content=full_content,
+                        delta=delta,
+                        model=model,
+                        finish_reason=None,
+                        tool_calls=tool_calls,
+                        metadata={
+                            "provider": self.get_provider_name(),
+                            "channel": "text",
+                        },
+                        chunk_index=chunk_index,
+                    )
+                    await callback_manager.on_llm_new_token(
+                        token=delta,
+                        chunk=response_chunk,
+                        run_id=run_id,
+                    )
+                    yield response_chunk
+                    chunk_index += 1
+                    continue
+
+                if event_type == "response.output_item.done":
+                    item = getattr(event, "item", None)
+                    if getattr(item, "type", None) != "function_call":
+                        continue
+                    tool_calls = [
+                        *tool_calls,
+                        ToolCall(
+                            id=getattr(item, "call_id", None) or getattr(item, "id", ""),
+                            type="function",
+                            function=Function(
+                                name=getattr(item, "name", None) or "unknown",
+                                arguments=getattr(item, "arguments", None) or "{}",
+                            ),
+                        ),
+                    ]
+                    yield self._build_responses_stream_chunk(
+                        content=full_content,
+                        delta="",
+                        model=model,
+                        finish_reason=None,
+                        tool_calls=tool_calls,
+                        tool_call_chunks=[
+                            {
+                                "index": getattr(event, "output_index", len(tool_calls) - 1),
+                                "id": tool_calls[-1].id,
+                                "type": "function",
+                                "function": {
+                                    "name": tool_calls[-1].function.name,
+                                    "arguments": tool_calls[-1].function.arguments,
+                                },
+                            }
+                        ],
+                        metadata={
+                            "provider": self.get_provider_name(),
+                            "channel": "tool",
+                        },
+                        chunk_index=chunk_index,
+                    )
+                    chunk_index += 1
+                    continue
+
+                if event_type == "response.completed":
+                    latest_response = getattr(event, "response", None)
+
+            if latest_response is None:
+                raise RuntimeError("OpenAI Responses stream completed without a final response")
+
+            final_content = self._extract_responses_text(latest_response)
+            if final_content and len(final_content) >= len(full_content):
+                full_content = final_content
+
+            final_tool_calls = self._extract_responses_tool_calls(latest_response) or tool_calls
+            finish_reason = self._responses_finish_reason(latest_response, final_tool_calls)
+            usage = self._responses_usage(latest_response)
+            final_metadata: Dict[str, Any] = {
+                "response_id": getattr(latest_response, "id", ""),
+                "created": getattr(latest_response, "created_at", None),
+            }
+            if full_reasoning:
+                final_metadata["reasoning"] = full_reasoning
+
+            final_chunk = self._build_responses_stream_chunk(
+                content=full_content,
+                delta="",
+                model=getattr(latest_response, "model", model),
+                finish_reason=finish_reason,
+                tool_calls=final_tool_calls,
+                usage=usage,
+                metadata=final_metadata,
+                chunk_index=chunk_index,
+            )
+            yield final_chunk
+
+            duration = asyncio.get_event_loop().time() - start_time
+            await callback_manager.on_llm_end(
+                response=LLMResponse(
+                    content=full_content,
+                    provider=self.get_provider_name(),
+                    model=getattr(latest_response, "model", model),
+                    finish_reason=finish_reason,
+                    native_finish_reason=finish_reason,
+                    tool_calls=final_tool_calls,
+                    usage=usage,
+                    duration=duration,
+                    metadata=final_metadata,
+                ),
+                run_id=run_id,
+            )
+        except Exception as e:
+            await callback_manager.on_llm_error(
+                error=e,
+                run_id=run_id,
+            )
+            await self._handle_error(e)
+
+    async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kwargs) -> LLMResponse:
+        if not self.client or not self._supports_responses_reasoning(messages, tools, kwargs):
+            return await super().chat_with_tools(messages, tools, **kwargs)
+
+        start_time = asyncio.get_event_loop().time()
+        output_queue = kwargs.get("output_queue")
+        request_kwargs = self._build_responses_request_kwargs(
+            messages,
+            kwargs,
+            tools=tools,
+            stream=output_queue is not None,
+        )
+
+        if output_queue is None:
+            response = await self.client.responses.create(**request_kwargs)
+            duration = asyncio.get_event_loop().time() - start_time
+            return self._convert_responses_response(response, duration)
+
+        stream = await self.client.responses.create(**request_kwargs)
+        latest_response = None
+        full_reasoning = ""
+
+        async for event in stream:
+            event_type = getattr(event, "type", None)
+            if event_type == "response.reasoning_summary_text.delta":
+                delta = getattr(event, "delta", "") or ""
+                if delta:
+                    full_reasoning += delta
+                    try:
+                        output_queue.put_nowait(
+                            build_output_queue_event(
+                                event_type="thinking",
+                                delta=delta,
+                                metadata={
+                                    "phase": "think",
+                                    "provider": self.get_provider_name(),
+                                    "channel": "thinking",
+                                },
+                            )
+                        )
+                    except Exception:
+                        pass
+            elif event_type == "response.output_text.delta":
+                delta = getattr(event, "delta", "") or ""
+                if delta:
+                    try:
+                        output_queue.put_nowait(
+                            build_output_queue_event(
+                                event_type="content",
+                                delta=delta,
+                                metadata={
+                                    "provider": self.get_provider_name(),
+                                    "channel": "text",
+                                },
+                            )
+                        )
+                    except Exception:
+                        pass
+            elif event_type == "response.completed":
+                latest_response = getattr(event, "response", None)
+
+        if latest_response is None:
+            raise RuntimeError("OpenAI Responses stream completed without a final response")
+
+        duration = asyncio.get_event_loop().time() - start_time
+        result = self._convert_responses_response(latest_response, duration)
+        if full_reasoning:
+            result.metadata["reasoning"] = full_reasoning
+        result.metadata["streamed_content"] = bool(result.content)
+        result.metadata["stream_chunk_count"] = 0
+        return result
+
     def get_metadata(self) -> ProviderMetadata:
         """Get OpenAI provider metadata."""
         return ProviderMetadata(
@@ -44,4 +617,4 @@ def get_metadata(self) -> ProviderMetadata:
                 "requests_per_minute": 3500,
                 "tokens_per_minute": 90000
             }
-        )
\ No newline at end of file
+        )
diff --git a/spoon_ai/schema.py b/spoon_ai/schema.py
index c614f64..be581b9 100644
--- a/spoon_ai/schema.py
+++ b/spoon_ai/schema.py
@@ -52,6 +52,10 @@ class ToolCall(BaseModel):
     id: str
     type: str = "function"
     function: Function
+    metadata: Optional[dict[str, Any]] = Field(
+        default=None,
+        description="Optional provider-specific metadata for preserving tool-call context",
+    )
 
 class AgentState(str, Enum):
     """
diff --git a/tests/test_agent_llm_integration.py b/tests/test_agent_llm_integration.py
index 98e2d3d..00bf39f 100644
--- a/tests/test_agent_llm_integration.py
+++ b/tests/test_agent_llm_integration.py
@@ -191,6 +191,33 @@ async def test_toolcall_agent_with_tools(self, mock_chatbot_manager, tool_manage
             tool_input={"param": "value"}
         )
         assert "Tool executed successfully" in result
+
+    @pytest.mark.asyncio
+    async def test_toolcall_agent_preserves_tool_call_metadata_in_memory(self, mock_chatbot_manager, tool_manager):
+        agent = ToolCallAgent(
+            name="test_agent",
+            llm=mock_chatbot_manager,
+            available_tools=tool_manager,
+        )
+
+        await agent.add_message(
+            "assistant",
+            "I'll use a tool.",
+            tool_calls=[
+                ToolCall(
+                    id="call_sig",
+                    type="function",
+                    function=Function(
+                        name="test_tool",
+                        arguments='{"param":"value"}',
+                    ),
+                    metadata={"thought_signature": "c2lnLTEyMw=="},
+                )
+            ],
+        )
+
+        stored = agent.memory.messages[-1].tool_calls[0]
+        assert stored.metadata == {"thought_signature": "c2lnLTEyMw=="}
     
     @pytest.mark.asyncio
     async def test_spoon_react_ai_initialization(self):
@@ -206,9 +233,11 @@ async def test_spoon_react_ai_initialization(self):
             # Verify it uses the new architecture
             assert agent.llm.use_llm_manager is True
 
-    def test_spoon_react_run_signatures_accept_thinking(self):
+    def test_spoon_react_run_signatures_accept_reasoning_kwargs(self):
         assert "thinking" in inspect.signature(SpoonReactAI.run).parameters
         assert "thinking" in inspect.signature(SpoonReactSkill.run).parameters
+        assert "reasoning_effort" in inspect.signature(SpoonReactAI.run).parameters
+        assert "reasoning_effort" in inspect.signature(SpoonReactSkill.run).parameters
     
     @pytest.mark.asyncio
     async def test_spoon_react_ai_fallback_to_legacy(self):
@@ -328,7 +357,7 @@ async def test_streamed_tool_response_does_not_enqueue_full_content_twice(self,
         assert all(call.args != ({"content": "already streamed full text"},) for call in put_calls)
 
     @pytest.mark.asyncio
-    async def test_toolcall_agent_emits_explicit_thinking_for_non_streamed_pre_tool_content(self, mock_chatbot_manager, tool_manager):
+    async def test_toolcall_agent_emits_progress_content_for_non_streamed_pre_tool_content(self, mock_chatbot_manager, tool_manager):
         mock_tool_call = ToolCall(
             id="call_123",
             type="function",
@@ -361,11 +390,11 @@ async def test_toolcall_agent_emits_explicit_thinking_for_non_streamed_pre_tool_
         assert should_continue is True
         put_calls = mock_queue.put_nowait.call_args_list
         assert put_calls[0].args[0] == {
-            "type": "thinking",
+            "type": "content",
             "delta": "First I will inspect the workspace.",
             "content": "First I will inspect the workspace.",
             "metadata": {
-                "phase": "think",
+                "phase": "progress",
                 "source": "toolcall_agent",
             },
         }
diff --git a/tests/test_tool_streaming_output.py b/tests/test_tool_streaming_output.py
index 1b78df9..496f20b 100644
--- a/tests/test_tool_streaming_output.py
+++ b/tests/test_tool_streaming_output.py
@@ -16,6 +16,7 @@
 from spoon_ai.llm.providers.gemini_provider import GeminiProvider
 from spoon_ai.llm.providers.ollama_provider import OllamaProvider
 from spoon_ai.llm.providers.openai_compatible_provider import OpenAICompatibleProvider
+from spoon_ai.llm.providers.openai_provider import OpenAIProvider
 from spoon_ai.llm.providers.openrouter_provider import OpenRouterProvider
 
 
@@ -187,6 +188,34 @@ async def test_chatbot_ask_tool_normalizes_anthropic_reasoning_effort():
     assert call.kwargs["output_config"] == {"effort": "high"}
 
 
+@pytest.mark.asyncio
+async def test_chatbot_ask_tool_enables_anthropic_adaptive_thinking_from_reasoning_effort_alone():
+    mock_manager = SimpleNamespace(chat_with_tools=AsyncMock())
+    mock_manager.chat_with_tools.return_value = LLMResponse(
+        content="ok",
+        provider="anthropic",
+        model="claude-sonnet-4.6",
+        finish_reason="stop",
+        native_finish_reason="stop",
+    )
+
+    with patch("spoon_ai.chat.get_llm_manager", return_value=mock_manager):
+        bot = ChatBot(
+            use_llm_manager=True,
+            llm_provider="anthropic",
+            model_name="claude-sonnet-4.6",
+        )
+        await bot.ask_tool(
+            [{"role": "user", "content": "hi"}],
+            tools=_tool_spec(),
+            reasoning_effort="high",
+        )
+
+    call = mock_manager.chat_with_tools.call_args
+    assert call.kwargs["thinking"] == {"type": "adaptive"}
+    assert call.kwargs["output_config"] == {"effort": "high"}
+
+
 @pytest.mark.asyncio
 async def test_chatbot_ask_tool_maps_anthropic_xhigh_to_max_for_opus_46():
     mock_manager = SimpleNamespace(chat_with_tools=AsyncMock())
@@ -358,6 +387,252 @@ async def test_openai_chat_with_tools_streams_deltas_to_output_queue():
     assert response.metadata.get("streamed_content") is True
 
 
+def test_openai_supports_temperature_for_gpt_54_only_without_reasoning():
+    provider = OpenAICompatibleProvider()
+
+    assert provider._supports_temperature("gpt-5.4", reasoning_effort="none") is True
+    assert provider._supports_temperature("gpt-5.4", reasoning_effort=None) is True
+    assert provider._supports_temperature("gpt-5.4", reasoning_effort="high") is False
+    assert provider._supports_temperature("gpt-5.4", reasoning_effort="medium") is False
+
+
+@pytest.mark.asyncio
+async def test_openai_chat_with_tools_uses_responses_reasoning_summary_when_effort_requested():
+    provider = OpenAIProvider()
+    provider.model = "gpt-5.4"
+
+    completed_response = SimpleNamespace(
+        id="resp_123",
+        created_at=123.0,
+        model="gpt-5.4",
+        output=[
+            SimpleNamespace(
+                type="reasoning",
+                summary=[
+                    SimpleNamespace(
+                        text="Plan: inspect the latest game state before choosing a move."
+                    )
+                ],
+            ),
+            SimpleNamespace(
+                type="function_call",
+                id="fc_123",
+                call_id="call_123",
+                name="echo_tool",
+                arguments='{"text":"hello"}',
+            ),
+        ],
+        usage=SimpleNamespace(
+            input_tokens=10,
+            output_tokens=7,
+            total_tokens=17,
+        ),
+    )
+    stream_items = [
+        SimpleNamespace(
+            type="response.reasoning_summary_text.delta",
+            delta="Plan: inspect the latest game state before choosing a move.",
+            output_index=0,
+            summary_index=0,
+            item_id="rs_123",
+        ),
+        SimpleNamespace(
+            type="response.output_item.done",
+            output_index=1,
+            item=SimpleNamespace(
+                type="function_call",
+                id="fc_123",
+                call_id="call_123",
+                name="echo_tool",
+                arguments='{"text":"hello"}',
+            ),
+        ),
+        SimpleNamespace(
+            type="response.completed",
+            response=completed_response,
+        ),
+    ]
+    provider.client = SimpleNamespace(
+        responses=SimpleNamespace(create=AsyncMock(return_value=_AsyncItems(stream_items))),
+        chat=SimpleNamespace(
+            completions=SimpleNamespace(create=AsyncMock()),
+        ),
+    )
+
+    q: asyncio.Queue = asyncio.Queue()
+    response = await provider.chat_with_tools(
+        messages=[Message(role="user", content="hi")],
+        tools=_tool_spec(),
+        output_queue=q,
+        reasoning_effort="high",
+    )
+
+    streamed_events: list[dict] = []
+    while not q.empty():
+        streamed_events.append(await q.get())
+
+    assert streamed_events == [
+        {
+            "type": "thinking",
+            "delta": "Plan: inspect the latest game state before choosing a move.",
+            "content": "Plan: inspect the latest game state before choosing a move.",
+            "metadata": {
+                "phase": "think",
+                "provider": "openai",
+                "channel": "thinking",
+            },
+        }
+    ]
+    assert response.content == ""
+    assert response.tool_calls[0].id == "call_123"
+    assert response.tool_calls[0].function.name == "echo_tool"
+    assert response.tool_calls[0].function.arguments == '{"text":"hello"}'
+    assert response.metadata["reasoning"] == (
+        "Plan: inspect the latest game state before choosing a move."
+    )
+    assert provider.client.responses.create.await_count == 1
+    assert provider.client.chat.completions.create.await_count == 0
+    request_kwargs = provider.client.responses.create.await_args.kwargs
+    assert request_kwargs["reasoning"] == {"effort": "high", "summary": "detailed"}
+    assert "temperature" not in request_kwargs
+    assert request_kwargs["tools"] == [
+        {
+            "type": "function",
+            "name": "echo_tool",
+            "description": "Echo text",
+            "parameters": {
+                "type": "object",
+                "properties": {"text": {"type": "string"}},
+                "required": ["text"],
+            },
+        }
+    ]
+
+
+@pytest.mark.asyncio
+async def test_openai_chat_stream_uses_responses_reasoning_summary_when_effort_requested():
+    provider = OpenAIProvider()
+    provider.model = "gpt-5.4"
+
+    completed_response = SimpleNamespace(
+        id="resp_stream_123",
+        created_at=456.0,
+        model="gpt-5.4",
+        output=[
+            SimpleNamespace(
+                type="reasoning",
+                summary=[
+                    SimpleNamespace(
+                        text="Plan: inspect the wallet before attempting to join."
+                    )
+                ],
+            ),
+            SimpleNamespace(
+                type="function_call",
+                id="fc_stream_123",
+                call_id="call_stream_123",
+                name="echo_tool",
+                arguments='{"text":"hello"}',
+            ),
+        ],
+        usage=SimpleNamespace(
+            input_tokens=12,
+            output_tokens=8,
+            total_tokens=20,
+        ),
+    )
+    stream_items = [
+        SimpleNamespace(
+            type="response.reasoning_summary_text.delta",
+            delta="Plan: inspect the wallet before attempting to join.",
+            output_index=0,
+            summary_index=0,
+            item_id="rs_stream_123",
+        ),
+        SimpleNamespace(
+            type="response.output_text.delta",
+            delta="Wallet looks ready.",
+        ),
+        SimpleNamespace(
+            type="response.output_item.done",
+            output_index=1,
+            item=SimpleNamespace(
+                type="function_call",
+                id="fc_stream_123",
+                call_id="call_stream_123",
+                name="echo_tool",
+                arguments='{"text":"hello"}',
+            ),
+        ),
+        SimpleNamespace(
+            type="response.completed",
+            response=completed_response,
+        ),
+    ]
+    provider.client = SimpleNamespace(
+        responses=SimpleNamespace(create=AsyncMock(return_value=_AsyncItems(stream_items))),
+        chat=SimpleNamespace(
+            completions=SimpleNamespace(create=AsyncMock()),
+        ),
+    )
+
+    chunks = [
+        chunk
+        async for chunk in provider.chat_stream(
+            messages=[Message(role="user", content="hi")],
+            tools=_tool_spec(),
+            reasoning_effort="high",
+        )
+    ]
+
+    assert chunks[0].delta == "Plan: inspect the wallet before attempting to join."
+    assert chunks[0].metadata == {
+        "type": "thinking",
+        "phase": "think",
+        "provider": "openai",
+        "channel": "thinking",
+    }
+    assert chunks[1].delta == "Wallet looks ready."
+    assert chunks[1].metadata == {
+        "provider": "openai",
+        "channel": "text",
+    }
+    assert chunks[2].tool_calls[0].id == "call_stream_123"
+    assert chunks[2].tool_calls[0].function.name == "echo_tool"
+    assert chunks[2].tool_call_chunks == [
+        {
+            "index": 1,
+            "id": "call_stream_123",
+            "type": "function",
+            "function": {
+                "name": "echo_tool",
+                "arguments": '{"text":"hello"}',
+            },
+        }
+    ]
+    assert chunks[-1].finish_reason == "tool_calls"
+    assert chunks[-1].metadata["reasoning"] == (
+        "Plan: inspect the wallet before attempting to join."
+    )
+    assert provider.client.responses.create.await_count == 1
+    assert provider.client.chat.completions.create.await_count == 0
+    request_kwargs = provider.client.responses.create.await_args.kwargs
+    assert request_kwargs["reasoning"] == {"effort": "high", "summary": "detailed"}
+    assert "temperature" not in request_kwargs
+    assert request_kwargs["tools"] == [
+        {
+            "type": "function",
+            "name": "echo_tool",
+            "description": "Echo text",
+            "parameters": {
+                "type": "object",
+                "properties": {"text": {"type": "string"}},
+                "required": ["text"],
+            },
+        }
+    ]
+
+
 @pytest.mark.asyncio
 async def test_openrouter_chat_with_tools_streams_reasoning_to_output_queue():
     provider = OpenRouterProvider()
@@ -478,6 +753,43 @@ async def test_openrouter_chat_with_tools_maps_reasoning_effort_to_extra_body():
     assert provider.client.chat.completions.create.call_args.kwargs["extra_body"]["reasoning"] == {
         "effort": "high"
     }
+    assert "reasoning_effort" not in provider.client.chat.completions.create.call_args.kwargs
+
+
+@pytest.mark.asyncio
+async def test_openai_compatible_chat_with_tools_drops_top_level_reasoning_effort_for_non_openrouter_requests():
+    provider = OpenAICompatibleProvider()
+    provider.model = "gpt-4.1"
+    provider.client = SimpleNamespace(
+        chat=SimpleNamespace(
+            completions=SimpleNamespace(
+                create=AsyncMock(
+                    return_value=SimpleNamespace(
+                        id="resp_456",
+                        choices=[
+                            SimpleNamespace(
+                                message=SimpleNamespace(content="done", tool_calls=None),
+                                finish_reason="stop",
+                            )
+                        ],
+                        created=456,
+                        usage=None,
+                        model="gpt-4.1",
+                    )
+                )
+            )
+        )
+    )
+
+    await provider.chat_with_tools(
+        messages=[Message(role="user", content="hi")],
+        tools=_tool_spec(),
+        reasoning_effort="high",
+    )
+
+    request_kwargs = provider.client.chat.completions.create.call_args.kwargs
+    assert "reasoning_effort" not in request_kwargs
+    assert "extra_body" not in request_kwargs
 
 
 @pytest.mark.asyncio
@@ -664,6 +976,44 @@ def _stream(**request_kwargs):
     assert response.content == "ok"
 
 
+@pytest.mark.asyncio
+async def test_anthropic_chat_with_tools_strips_temperature_top_k_and_forced_tool_choice_when_thinking_enabled():
+    provider = AnthropicProvider()
+    provider.model = "claude-opus-4.7"
+
+    captured_kwargs: dict = {}
+    chunks = [
+        SimpleNamespace(type="content_block_start", content_block=SimpleNamespace(type="text")),
+        SimpleNamespace(type="content_block_delta", delta=SimpleNamespace(type="text_delta", text="ok")),
+        SimpleNamespace(type="content_block_stop"),
+        SimpleNamespace(type="message_delta", delta=SimpleNamespace(stop_reason="end_turn")),
+        SimpleNamespace(type="message_stop", message=SimpleNamespace(stop_reason="end_turn")),
+    ]
+
+    def _stream(**request_kwargs):
+        captured_kwargs.update(request_kwargs)
+        return _AsyncStreamContext(chunks)
+
+    provider.client = SimpleNamespace(
+        messages=SimpleNamespace(stream=_stream)
+    )
+
+    response = await provider.chat_with_tools(
+        messages=[Message(role="user", content="hi")],
+        tools=_tool_spec(),
+        thinking=True,
+        temperature=0.7,
+        top_k=5,
+        tool_choice="required",
+    )
+
+    assert captured_kwargs["thinking"] == {"type": "adaptive"}
+    assert "temperature" not in captured_kwargs
+    assert "top_k" not in captured_kwargs
+    assert "tool_choice" not in captured_kwargs
+    assert response.content == "ok"
+
+
 @pytest.mark.asyncio
 async def test_gemini_chat_with_tools_streams_deltas_to_output_queue():
     provider = GeminiProvider()
@@ -720,6 +1070,87 @@ def close(self):
     assert response.metadata.get("streamed_content") is True
 
 
+@pytest.mark.asyncio
+async def test_gemini_chat_with_tools_streams_provider_thinking_and_preserves_tool_call_signature():
+    provider = GeminiProvider()
+    provider.api_key = "test-key"
+    provider.model = "gemini-2.5-pro"
+
+    signature = b"sig-123"
+    responses = [
+        SimpleNamespace(
+            candidates=[
+                SimpleNamespace(
+                    content=SimpleNamespace(
+                        parts=[
+                            SimpleNamespace(text="Plan: inspect files.", thought=True),
+                            SimpleNamespace(
+                                function_call=SimpleNamespace(
+                                    name="get_weather",
+                                    args={"city": "Paris"},
+                                ),
+                                thought_signature=signature,
+                            ),
+                        ]
+                    ),
+                    finish_reason="STOP",
+                )
+            ]
+        ),
+    ]
+
+    class _FakeGeminiClient:
+        captured_config = None
+
+        def __init__(self, api_key: str):
+            self.aio = SimpleNamespace(
+                models=SimpleNamespace(
+                    generate_content_stream=AsyncMock(side_effect=self._generate_content_stream)
+                ),
+                aclose=AsyncMock(return_value=None),
+            )
+
+        async def _generate_content_stream(self, *, config, **kwargs):
+            type(self).captured_config = config
+            return _AsyncItems(responses)
+
+        def close(self):
+            return None
+
+    with patch("spoon_ai.llm.providers.gemini_provider.genai.Client", _FakeGeminiClient):
+        q: asyncio.Queue = asyncio.Queue()
+        response = await provider.chat_with_tools(
+            messages=[Message(role="user", content="hi")],
+            tools=_tool_spec(),
+            output_queue=q,
+            thinking_budget=128,
+        )
+
+    streamed_events: list[dict] = []
+    while not q.empty():
+        streamed_events.append(await q.get())
+
+    assert streamed_events == [
+        {
+            "type": "thinking",
+            "delta": "Plan: inspect files.",
+            "content": "Plan: inspect files.",
+            "metadata": {
+                "phase": "think",
+                "provider": "gemini",
+                "channel": "thinking",
+            },
+        }
+    ]
+    assert _FakeGeminiClient.captured_config.thinking_config.include_thoughts is True
+    assert response.finish_reason == "tool_calls"
+    assert response.metadata["reasoning"] == "Plan: inspect files."
+    assert response.tool_calls[0].function.name == "get_weather"
+    assert response.tool_calls[0].metadata == {
+        "thought_signature": "c2lnLTEyMw=="
+    }
+
+
 @pytest.mark.asyncio
 async def test_gemini_chat_with_tools_emits_first_chunk_before_completion():
     provider = GeminiProvider()
@@ -784,6 +1215,36 @@ def close(self):
     assert response.metadata.get("streamed_content") is True
 
 
+def test_gemini_convert_messages_for_tools_reuses_thought_signature():
+    provider = GeminiProvider()
+
+    system_content, gemini_messages = provider._convert_messages_for_tools(
+        [
+            Message(
+                role="assistant",
+                content="Working on it.",
+                tool_calls=[
+                    {
+                        "id": "call_123",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": '{"city":"Paris"}',
+                        },
+                        "metadata": {
+                            "thought_signature": "c2lnLTEyMw==",
+                        },
+                    }
+                ],
+            )
+        ]
+    )
+
+    assert system_content == ""
+    assert gemini_messages[0].parts[1].function_call.name == "get_weather"
+    assert gemini_messages[0].parts[1].thought_signature == b"sig-123"
+
+
 @pytest.mark.asyncio
 async def test_gemini_chat_stream_yields_incrementally():
     provider = GeminiProvider()
@@ -835,6 +1296,65 @@ def close(self):
     assert second.finish_reason == "stop"
 
 
+@pytest.mark.asyncio
+async def test_gemini_chat_stream_yields_thinking_before_visible_content():
+    provider = GeminiProvider()
+    provider.api_key = "test-key"
+    provider.model = "gemini-2.5-pro"
+
+    responses = [
+        SimpleNamespace(
+            candidates=[
+                SimpleNamespace(
+                    content=SimpleNamespace(
+                        parts=[
+                            SimpleNamespace(text="Plan: inspect.", thought=True),
+                            SimpleNamespace(text="Done."),
+                        ]
+                    ),
+                    finish_reason="STOP",
+                )
+            ],
+            usage_metadata=None,
+        ),
+    ]
+
+    class _FakeGeminiClient:
+        def __init__(self, api_key: str):
+            self.aio = SimpleNamespace(
+                models=SimpleNamespace(
+                    generate_content_stream=AsyncMock(return_value=_AsyncItems(responses))
+                ),
+                aclose=AsyncMock(return_value=None),
+            )
+
+        def close(self):
+            return None
+
+    with patch("spoon_ai.llm.providers.gemini_provider.genai.Client", _FakeGeminiClient):
+        chunks = [
+            chunk
+            async for chunk in provider.chat_stream(
+                messages=[Message(role="user", content="hi")],
+                thinking_budget=128,
+            )
+        ]
+
+    assert chunks[0].delta == "Plan: inspect."
+    assert chunks[0].metadata == {
+        "chunk_index": 0,
+        "type": "thinking",
+        "phase": "think",
+        "provider": "gemini",
+        "channel": "thinking",
+    }
+    assert chunks[1].delta == "Done."
+    assert chunks[1].metadata == {
+        "chunk_index": 1,
+        "finish_reason": "stop",
+    }
+
+
 @pytest.mark.asyncio
 async def test_gemini_chat_with_tools_keeps_tool_calls_finish_reason_when_native_is_stop():
     provider = GeminiProvider()