XSpoonAi · veithly · Apr 21, 2026 · Apr 20, 2026
diff --git a/spoon_ai/agents/base.py b/spoon_ai/agents/base.py
@@ -257,15 +257,24 @@ async def add_message(
                     elif role == "assistant":
                         if tool_calls:
                             formatted_tool_calls = [
-                                {
-                                    "id": toolcall.id,
-                                    "type": "function",
-                                    "function": (
-                                        toolcall.function.model_dump()
-                                        if isinstance(toolcall.function, BaseModel)
-                                        else toolcall.function
-                                    )
-                                }
+                                (
+                                    toolcall.model_dump()
+                                    if isinstance(toolcall, BaseModel)
+                                    else {
+                                        "id": toolcall.id,
+                                        "type": "function",
+                                        "function": (
+                                            toolcall.function.model_dump()
+                                            if isinstance(toolcall.function, BaseModel)
+                                            else toolcall.function
+                                        ),
+                                        **(
+                                            {"metadata": getattr(toolcall, "metadata", None)}
+                                            if getattr(toolcall, "metadata", None) is not None
+                                            else {}
+                                        ),
+                                    }
+                                )
                                 for toolcall in tool_calls
                             ]
                             message = Message(

diff --git a/spoon_ai/agents/spoon_react.py b/spoon_ai/agents/spoon_react.py
@@ -192,6 +192,7 @@ async def run(
         request: Optional[str] = None,
         timeout: Optional[float] = None,
         thinking: bool = False,
+        reasoning_effort: Optional[str] = None,
     ) -> str:
         """Ensure prompts reflect current tools before running."""
         self._refresh_prompts()
@@ -202,4 +203,6 @@ async def run(
             kwargs["timeout"] = timeout
         if thinking:
             kwargs["thinking"] = True
+        if reasoning_effort is not None:
+            kwargs["reasoning_effort"] = reasoning_effort
         return await super().run(**kwargs)
diff --git a/spoon_ai/agents/spoon_react_skill.py b/spoon_ai/agents/spoon_react_skill.py
@@ -94,6 +94,7 @@ async def run(
         request: Optional[str] = None,
         timeout: Optional[float] = None,
         thinking: bool = False,
+        reasoning_effort: Optional[str] = None,
     ) -> str:
         """
         Execute agent with per-turn auto skill activation.
@@ -122,6 +123,8 @@ async def _runner(req: Optional[str]) -> str:
                 kwargs["timeout"] = timeout
             if thinking:
                 kwargs["thinking"] = True
+            if reasoning_effort is not None:
+                kwargs["reasoning_effort"] = reasoning_effort
             return await super(SpoonReactSkill, self).run(**kwargs)
 
         return await self._run_with_auto_skills(request, _runner)

diff --git a/spoon_ai/agents/toolcall.py b/spoon_ai/agents/toolcall.py
@@ -50,6 +50,7 @@ class ToolCallAgent(ReActAgent):
 
     # Track last tool error for higher-level fallbacks
     last_tool_error: Optional[str] = Field(default=None, exclude=True)
+    last_reasoning_summary: Optional[str] = Field(default=None, exclude=True)
 
     # Reduced default timeout as per user request (blockchain operations will focus on submission)
     _default_timeout: float = 120.0
@@ -122,6 +123,7 @@ async def think(
         thinking: bool = False,
         reasoning_effort: Optional[str] = None,
     ) -> bool:
+        self.last_reasoning_summary = None
         last_role = getattr(self.memory.messages[-1], "role", None) if self.memory.messages else None
         if self.next_step_prompt and last_role != "user":
             await self.add_message("user", self.next_step_prompt)
@@ -228,6 +230,8 @@ def convert_mcp_tool(tool: MCPTool) -> dict:
 
         self.tool_calls = response.tool_calls
         response_metadata = getattr(response, "metadata", {}) or {}
+        if isinstance(response_metadata, dict):
+            self.last_reasoning_summary = response_metadata.get("reasoning")
         streamed_content = bool(
             isinstance(response_metadata, dict)
             and response_metadata.get("streamed_content")
@@ -259,10 +263,10 @@ def convert_mcp_tool(tool: MCPTool) -> dict:
             if response.content and not streamed_content:
                 self.output_queue.put_nowait(
                     build_output_queue_event(
-                        event_type="thinking",
+                        event_type="content",
                         delta=response.content,
                         metadata={
-                            "phase": "think",
+                            "phase": "progress",
                             "source": "toolcall_agent",
                         },
                     )

diff --git a/spoon_ai/chat.py b/spoon_ai/chat.py
@@ -824,6 +824,8 @@ def _normalize_tool_request_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, An
                 output_config = dict(normalized.get("output_config") or {})
                 output_config["effort"] = anthropic_effort
                 normalized["output_config"] = output_config
+                if not thinking and self._anthropic_supports_adaptive_thinking(model):
+                    normalized["thinking"] = {"type": "adaptive"}
 
             if thinking:
                 if isinstance(thinking, dict):

diff --git a/spoon_ai/llm/providers/anthropic_provider.py b/spoon_ai/llm/providers/anthropic_provider.py
@@ -346,22 +346,68 @@ def get_cache_metrics(self) -> Dict[str, int]:
         return self.cache_metrics.copy()
 
     @staticmethod
-    def _normalize_thinking_param(thinking: Any) -> Optional[Dict[str, Any]]:
+    def _canonical_model_name(model: str) -> str:
+        normalized = (model or "").strip().lower().replace("_", "-").replace(".", "-")
+        return normalized.rsplit("/", 1)[-1]
+
+    @classmethod
+    def _requires_adaptive_thinking(cls, model: str) -> bool:
+        canonical = cls._canonical_model_name(model)
+        return canonical.startswith("claude-opus-4-7")
+
+    @staticmethod
+    def _thinking_enabled(thinking_config: Optional[Dict[str, Any]]) -> bool:
+        if not isinstance(thinking_config, dict):
+            return False
+        return str(thinking_config.get("type") or "").strip().lower() != "disabled"
+
+    @staticmethod
+    def _tool_choice_forces_tools(tool_choice: Any) -> bool:
+        if not tool_choice:
+            return False
+
+        if isinstance(tool_choice, str):
+            return tool_choice.strip().lower() not in {"auto", "none"}
+
+        if isinstance(tool_choice, dict):
+            return str(tool_choice.get("type") or "").strip().lower() not in {"", "auto", "none"}
+
+        return True
+
+    @classmethod
+    def _normalize_thinking_param(
+        cls,
+        thinking: Any,
+        *,
+        model: Optional[str] = None,
+        output_config: Any = None,
+    ) -> Optional[Dict[str, Any]]:
         """Accept a boolean alias but send Anthropic the structured thinking object."""
+        requires_adaptive = bool(model) and cls._requires_adaptive_thinking(model)
+        has_effort = isinstance(output_config, dict) and bool(output_config.get("effort"))
+
         if isinstance(thinking, dict):
             normalized = dict(thinking)
             thinking_type = str(normalized.get("type") or "").strip().lower()
             if thinking_type == "adaptive":
                 return {"type": "adaptive"}
+            if thinking_type == "disabled":
+                return normalized
+            if requires_adaptive or has_effort:
+                return {"type": "adaptive"}
             if thinking_type != "disabled":
                 normalized.setdefault("type", "enabled")
                 normalized.setdefault("budget_tokens", 1024)
             return normalized
         if thinking is True:
+            if requires_adaptive or has_effort:
+                return {"type": "adaptive"}
             return {
                 "type": "enabled",
                 "budget_tokens": 1024,
             }
+        if thinking is None and has_effort:
+            return {"type": "adaptive"}
         return None
 
     async def chat(self, messages: List[Message], **kwargs) -> LLMResponse:
@@ -389,16 +435,22 @@ async def chat(self, messages: List[Message], **kwargs) -> LLMResponse:
                 k: v for k, v in kwargs.items() if k not in ['model', 'max_tokens', 'temperature']
             }
             thinking_config = self._normalize_thinking_param(
-                extra_request_kwargs.pop("thinking", None)
+                extra_request_kwargs.pop("thinking", None),
+                model=model,
+                output_config=extra_request_kwargs.get("output_config"),
             )
+            thinking_enabled = self._thinking_enabled(thinking_config)
+            if thinking_enabled:
+                extra_request_kwargs.pop("top_k", None)
 
             request_params = {
                 'model': model,
                 'max_tokens': max_tokens,
-                'temperature': temperature,
                 'messages': anthropic_messages,
                 **extra_request_kwargs,
             }
+            if not thinking_enabled:
+                request_params['temperature'] = temperature
             if thinking_config is not None:
                 request_params['thinking'] = thinking_config
 
@@ -453,16 +505,22 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] =
                 if k not in ['model', 'max_tokens', 'temperature', 'callbacks']
             }
             thinking_config = self._normalize_thinking_param(
-                extra_request_kwargs.pop("thinking", None)
+                extra_request_kwargs.pop("thinking", None),
+                model=model,
+                output_config=extra_request_kwargs.get("output_config"),
             )
+            thinking_enabled = self._thinking_enabled(thinking_config)
+            if thinking_enabled:
+                extra_request_kwargs.pop("top_k", None)
 
             request_params = {
                 'model': model,
                 'max_tokens': max_tokens,
-                'temperature': temperature,
                 'messages': anthropic_messages,
                 **extra_request_kwargs,
             }
+            if not thinking_enabled:
+                request_params['temperature'] = temperature
             if thinking_config is not None:
                 request_params['thinking'] = thinking_config
 
@@ -472,6 +530,7 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] =
 
             # Process streaming response
             full_content = ""
+            full_reasoning = ""
             chunk_index = 0
             finish_reason = None
             usage_data = None
@@ -506,6 +565,30 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] =
                         )
                         chunk_index += 1
                         yield response_chunk
+                    elif chunk.type == "content_block_delta" and chunk.delta.type == "thinking_delta":
+                        token = getattr(chunk.delta, "thinking", "") or ""
+                        if not token:
+                            continue
+                        full_reasoning += token
+                        yield LLMResponseChunk(
+                            content=full_reasoning,
+                            delta=token,
+                            provider="anthropic",
+                            model=model,
+                            finish_reason=finish_reason,
+                            tool_calls=[],
+                            usage=usage_data,
+                            metadata={
+                                "chunk_index": chunk_index,
+                                "chunk_type": chunk.type,
+                                "type": "thinking",
+                                "phase": "think",
+                                "provider": "anthropic",
+                                "channel": "thinking",
+                            },
+                            chunk_index=chunk_index,
+                        )
+                        chunk_index += 1
 
                     elif chunk.type == "message_start":
                         if hasattr(chunk, 'message') and hasattr(chunk.message, 'usage'):
@@ -592,22 +675,34 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw
                 if k not in ['model', 'max_tokens', 'temperature', 'tool_choice', 'output_queue']
             }
             thinking_config = self._normalize_thinking_param(
-                extra_request_kwargs.pop("thinking", None)
+                extra_request_kwargs.pop("thinking", None),
+                model=model,
+                output_config=extra_request_kwargs.get("output_config"),
             )
+            thinking_enabled = self._thinking_enabled(thinking_config)
+            if thinking_enabled:
+                extra_request_kwargs.pop("top_k", None)
 
             request_params = {
                 'model': model,
                 'max_tokens': max_tokens,
-                'temperature': temperature,
                 'messages': anthropic_messages,
                 'tools': anthropic_tools,
                 **extra_request_kwargs,
             }
+            if not thinking_enabled:
+                request_params['temperature'] = temperature
             if thinking_config is not None:
                 request_params['thinking'] = thinking_config
 
             # Anthropic expects tool_choice as an object, not a plain string/enum
             if tool_choice:
+                if thinking_enabled and self._tool_choice_forces_tools(tool_choice):
+                    logger.warning(
+                        "Anthropic thinking mode does not support forced tool_choice=%r; falling back to auto",
+                        tool_choice,
+                    )
+                    tool_choice = None
                 if isinstance(tool_choice, str):
                     request_params['tool_choice'] = {"type": tool_choice}
                 elif isinstance(tool_choice, dict):