diff --git a/spoon_ai/agents/base.py b/spoon_ai/agents/base.py index b19f807..ee57e14 100644 --- a/spoon_ai/agents/base.py +++ b/spoon_ai/agents/base.py @@ -257,15 +257,24 @@ async def add_message( elif role == "assistant": if tool_calls: formatted_tool_calls = [ - { - "id": toolcall.id, - "type": "function", - "function": ( - toolcall.function.model_dump() - if isinstance(toolcall.function, BaseModel) - else toolcall.function - ) - } + ( + toolcall.model_dump() + if isinstance(toolcall, BaseModel) + else { + "id": toolcall.id, + "type": "function", + "function": ( + toolcall.function.model_dump() + if isinstance(toolcall.function, BaseModel) + else toolcall.function + ), + **( + {"metadata": getattr(toolcall, "metadata", None)} + if getattr(toolcall, "metadata", None) is not None + else {} + ), + } + ) for toolcall in tool_calls ] message = Message( diff --git a/spoon_ai/agents/spoon_react.py b/spoon_ai/agents/spoon_react.py index 5061cad..30ada7b 100644 --- a/spoon_ai/agents/spoon_react.py +++ b/spoon_ai/agents/spoon_react.py @@ -192,6 +192,7 @@ async def run( request: Optional[str] = None, timeout: Optional[float] = None, thinking: bool = False, + reasoning_effort: Optional[str] = None, ) -> str: """Ensure prompts reflect current tools before running.""" self._refresh_prompts() @@ -202,4 +203,6 @@ async def run( kwargs["timeout"] = timeout if thinking: kwargs["thinking"] = True + if reasoning_effort is not None: + kwargs["reasoning_effort"] = reasoning_effort return await super().run(**kwargs) diff --git a/spoon_ai/agents/spoon_react_skill.py b/spoon_ai/agents/spoon_react_skill.py index 12f49a1..9869fde 100644 --- a/spoon_ai/agents/spoon_react_skill.py +++ b/spoon_ai/agents/spoon_react_skill.py @@ -94,6 +94,7 @@ async def run( request: Optional[str] = None, timeout: Optional[float] = None, thinking: bool = False, + reasoning_effort: Optional[str] = None, ) -> str: """ Execute agent with per-turn auto skill activation. @@ -122,6 +123,8 @@ async def _runner(req: Optional[str]) -> str: kwargs["timeout"] = timeout if thinking: kwargs["thinking"] = True + if reasoning_effort is not None: + kwargs["reasoning_effort"] = reasoning_effort return await super(SpoonReactSkill, self).run(**kwargs) return await self._run_with_auto_skills(request, _runner) diff --git a/spoon_ai/agents/toolcall.py b/spoon_ai/agents/toolcall.py index 20ea253..6676659 100644 --- a/spoon_ai/agents/toolcall.py +++ b/spoon_ai/agents/toolcall.py @@ -50,6 +50,7 @@ class ToolCallAgent(ReActAgent): # Track last tool error for higher-level fallbacks last_tool_error: Optional[str] = Field(default=None, exclude=True) + last_reasoning_summary: Optional[str] = Field(default=None, exclude=True) # Reduced default timeout as per user request (blockchain operations will focus on submission) _default_timeout: float = 120.0 @@ -122,6 +123,7 @@ async def think( thinking: bool = False, reasoning_effort: Optional[str] = None, ) -> bool: + self.last_reasoning_summary = None last_role = getattr(self.memory.messages[-1], "role", None) if self.memory.messages else None if self.next_step_prompt and last_role != "user": await self.add_message("user", self.next_step_prompt) @@ -228,6 +230,8 @@ def convert_mcp_tool(tool: MCPTool) -> dict: self.tool_calls = response.tool_calls response_metadata = getattr(response, "metadata", {}) or {} + if isinstance(response_metadata, dict): + self.last_reasoning_summary = response_metadata.get("reasoning") streamed_content = bool( isinstance(response_metadata, dict) and response_metadata.get("streamed_content") @@ -259,10 +263,10 @@ def convert_mcp_tool(tool: MCPTool) -> dict: if response.content and not streamed_content: self.output_queue.put_nowait( build_output_queue_event( - event_type="thinking", + event_type="content", delta=response.content, metadata={ - "phase": "think", + "phase": "progress", "source": "toolcall_agent", }, ) diff --git a/spoon_ai/chat.py b/spoon_ai/chat.py index 44eb411..c34853d 100644 --- a/spoon_ai/chat.py +++ b/spoon_ai/chat.py @@ -824,6 +824,8 @@ def _normalize_tool_request_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, An output_config = dict(normalized.get("output_config") or {}) output_config["effort"] = anthropic_effort normalized["output_config"] = output_config + if not thinking and self._anthropic_supports_adaptive_thinking(model): + normalized["thinking"] = {"type": "adaptive"} if thinking: if isinstance(thinking, dict): diff --git a/spoon_ai/llm/providers/anthropic_provider.py b/spoon_ai/llm/providers/anthropic_provider.py index 6b7849f..a139bca 100644 --- a/spoon_ai/llm/providers/anthropic_provider.py +++ b/spoon_ai/llm/providers/anthropic_provider.py @@ -346,22 +346,68 @@ def get_cache_metrics(self) -> Dict[str, int]: return self.cache_metrics.copy() @staticmethod - def _normalize_thinking_param(thinking: Any) -> Optional[Dict[str, Any]]: + def _canonical_model_name(model: str) -> str: + normalized = (model or "").strip().lower().replace("_", "-").replace(".", "-") + return normalized.rsplit("/", 1)[-1] + + @classmethod + def _requires_adaptive_thinking(cls, model: str) -> bool: + canonical = cls._canonical_model_name(model) + return canonical.startswith("claude-opus-4-7") + + @staticmethod + def _thinking_enabled(thinking_config: Optional[Dict[str, Any]]) -> bool: + if not isinstance(thinking_config, dict): + return False + return str(thinking_config.get("type") or "").strip().lower() != "disabled" + + @staticmethod + def _tool_choice_forces_tools(tool_choice: Any) -> bool: + if not tool_choice: + return False + + if isinstance(tool_choice, str): + return tool_choice.strip().lower() not in {"auto", "none"} + + if isinstance(tool_choice, dict): + return str(tool_choice.get("type") or "").strip().lower() not in {"", "auto", "none"} + + return True + + @classmethod + def _normalize_thinking_param( + cls, + thinking: Any, + *, + model: Optional[str] = None, + output_config: Any = None, + ) -> Optional[Dict[str, Any]]: """Accept a boolean alias but send Anthropic the structured thinking object.""" + requires_adaptive = bool(model) and cls._requires_adaptive_thinking(model) + has_effort = isinstance(output_config, dict) and bool(output_config.get("effort")) + if isinstance(thinking, dict): normalized = dict(thinking) thinking_type = str(normalized.get("type") or "").strip().lower() if thinking_type == "adaptive": return {"type": "adaptive"} + if thinking_type == "disabled": + return normalized + if requires_adaptive or has_effort: + return {"type": "adaptive"} if thinking_type != "disabled": normalized.setdefault("type", "enabled") normalized.setdefault("budget_tokens", 1024) return normalized if thinking is True: + if requires_adaptive or has_effort: + return {"type": "adaptive"} return { "type": "enabled", "budget_tokens": 1024, } + if thinking is None and has_effort: + return {"type": "adaptive"} return None async def chat(self, messages: List[Message], **kwargs) -> LLMResponse: @@ -389,16 +435,22 @@ async def chat(self, messages: List[Message], **kwargs) -> LLMResponse: k: v for k, v in kwargs.items() if k not in ['model', 'max_tokens', 'temperature'] } thinking_config = self._normalize_thinking_param( - extra_request_kwargs.pop("thinking", None) + extra_request_kwargs.pop("thinking", None), + model=model, + output_config=extra_request_kwargs.get("output_config"), ) + thinking_enabled = self._thinking_enabled(thinking_config) + if thinking_enabled: + extra_request_kwargs.pop("top_k", None) request_params = { 'model': model, 'max_tokens': max_tokens, - 'temperature': temperature, 'messages': anthropic_messages, **extra_request_kwargs, } + if not thinking_enabled: + request_params['temperature'] = temperature if thinking_config is not None: request_params['thinking'] = thinking_config @@ -453,16 +505,22 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] = if k not in ['model', 'max_tokens', 'temperature', 'callbacks'] } thinking_config = self._normalize_thinking_param( - extra_request_kwargs.pop("thinking", None) + extra_request_kwargs.pop("thinking", None), + model=model, + output_config=extra_request_kwargs.get("output_config"), ) + thinking_enabled = self._thinking_enabled(thinking_config) + if thinking_enabled: + extra_request_kwargs.pop("top_k", None) request_params = { 'model': model, 'max_tokens': max_tokens, - 'temperature': temperature, 'messages': anthropic_messages, **extra_request_kwargs, } + if not thinking_enabled: + request_params['temperature'] = temperature if thinking_config is not None: request_params['thinking'] = thinking_config @@ -472,6 +530,7 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] = # Process streaming response full_content = "" + full_reasoning = "" chunk_index = 0 finish_reason = None usage_data = None @@ -506,6 +565,30 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] = ) chunk_index += 1 yield response_chunk + elif chunk.type == "content_block_delta" and chunk.delta.type == "thinking_delta": + token = getattr(chunk.delta, "thinking", "") or "" + if not token: + continue + full_reasoning += token + yield LLMResponseChunk( + content=full_reasoning, + delta=token, + provider="anthropic", + model=model, + finish_reason=finish_reason, + tool_calls=[], + usage=usage_data, + metadata={ + "chunk_index": chunk_index, + "chunk_type": chunk.type, + "type": "thinking", + "phase": "think", + "provider": "anthropic", + "channel": "thinking", + }, + chunk_index=chunk_index, + ) + chunk_index += 1 elif chunk.type == "message_start": if hasattr(chunk, 'message') and hasattr(chunk.message, 'usage'): @@ -592,22 +675,34 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw if k not in ['model', 'max_tokens', 'temperature', 'tool_choice', 'output_queue'] } thinking_config = self._normalize_thinking_param( - extra_request_kwargs.pop("thinking", None) + extra_request_kwargs.pop("thinking", None), + model=model, + output_config=extra_request_kwargs.get("output_config"), ) + thinking_enabled = self._thinking_enabled(thinking_config) + if thinking_enabled: + extra_request_kwargs.pop("top_k", None) request_params = { 'model': model, 'max_tokens': max_tokens, - 'temperature': temperature, 'messages': anthropic_messages, 'tools': anthropic_tools, **extra_request_kwargs, } + if not thinking_enabled: + request_params['temperature'] = temperature if thinking_config is not None: request_params['thinking'] = thinking_config # Anthropic expects tool_choice as an object, not a plain string/enum if tool_choice: + if thinking_enabled and self._tool_choice_forces_tools(tool_choice): + logger.warning( + "Anthropic thinking mode does not support forced tool_choice=%r; falling back to auto", + tool_choice, + ) + tool_choice = None if isinstance(tool_choice, str): request_params['tool_choice'] = {"type": tool_choice} elif isinstance(tool_choice, dict): diff --git a/spoon_ai/llm/providers/gemini_provider.py b/spoon_ai/llm/providers/gemini_provider.py index b46d76e..a2ada6c 100644 --- a/spoon_ai/llm/providers/gemini_provider.py +++ b/spoon_ai/llm/providers/gemini_provider.py @@ -34,6 +34,7 @@ import base64 import re from spoon_ai.callbacks.manager import CallbackManager +from spoon_ai.utils.streaming import build_output_queue_event from ..interface import LLMProviderInterface, LLMResponse, ProviderMetadata, ProviderCapability from ..errors import ProviderError, AuthenticationError, RateLimitError, ModelNotFoundError, NetworkError from ..message_utils import drop_orphaned_tool_messages @@ -243,8 +244,12 @@ def _apply_thinking_defaults( if budget_int < min_thinking_budget: budget_int = min_thinking_budget + include_thoughts = getattr(thinking_cfg, "include_thoughts", None) + if include_thoughts is None: + include_thoughts = True + return max_tokens, types.ThinkingConfig( - include_thoughts=getattr(thinking_cfg, "include_thoughts", None), + include_thoughts=include_thoughts, thinking_level=getattr(thinking_cfg, "thinking_level", None), thinking_budget=budget_int, ) @@ -262,7 +267,96 @@ def _apply_thinking_defaults( if thinking_budget_int < min_thinking_budget: thinking_budget_int = min_thinking_budget - return max_tokens, types.ThinkingConfig(thinking_budget=thinking_budget_int) + return max_tokens, types.ThinkingConfig( + thinking_budget=thinking_budget_int, + include_thoughts=True, + ) + + @staticmethod + def _encode_thought_signature(thought_signature: Any) -> Optional[str]: + if thought_signature is None: + return None + if isinstance(thought_signature, str): + return thought_signature + if isinstance(thought_signature, (bytes, bytearray)): + return base64.b64encode(bytes(thought_signature)).decode("ascii") + return None + + @staticmethod + def _decode_thought_signature(thought_signature: Any) -> Optional[bytes]: + if thought_signature is None: + return None + if isinstance(thought_signature, bytes): + return thought_signature + if isinstance(thought_signature, bytearray): + return bytes(thought_signature) + if isinstance(thought_signature, str): + try: + return base64.b64decode(thought_signature) + except Exception: + return None + return None + + @classmethod + def _tool_call_from_function_call( + cls, + function_call: Any, + *, + thought_signature: Any = None, + ) -> ToolCall: + arguments_json = json.dumps(function_call.args) if function_call.args else "{}" + encoded_signature = cls._encode_thought_signature(thought_signature) + metadata = {"thought_signature": encoded_signature} if encoded_signature else None + return ToolCall( + id=f"call_{uuid.uuid4().hex[:8]}", + type="function", + function=Function( + name=function_call.name, + arguments=arguments_json, + ), + metadata=metadata, + ) + + @classmethod + def _function_call_part_from_tool_call(cls, tool_call: ToolCall) -> types.Part: + part = types.Part.from_function_call( + name=tool_call.function.name, + args=tool_call.function.get_arguments_dict(), + ) + thought_signature = cls._decode_thought_signature( + getattr(tool_call, "metadata", None) and tool_call.metadata.get("thought_signature") + ) + if thought_signature is not None: + part.thought_signature = thought_signature + return part + + @classmethod + def _extract_stream_parts( + cls, + parts: List[Any], + ) -> tuple[str, str, List[ToolCall]]: + thought_delta = "" + visible_delta = "" + tool_calls: List[ToolCall] = [] + + for part in parts: + text = getattr(part, "text", None) + if text: + if getattr(part, "thought", False): + thought_delta += text + else: + visible_delta += text + + function_call = getattr(part, "function_call", None) + if function_call: + tool_calls.append( + cls._tool_call_from_function_call( + function_call, + thought_signature=getattr(part, "thought_signature", None), + ) + ) + + return thought_delta, visible_delta, tool_calls async def initialize(self, config: Dict[str, Any]) -> None: """Initialize the Gemini provider with configuration.""" @@ -551,11 +645,7 @@ def _convert_messages_for_tools(self, messages: List[Message]) -> tuple[Optional parts.append(types.Part.from_text(text=message.content)) for tool_call in message.tool_calls: - args = tool_call.function.get_arguments_dict() - parts.append(types.Part.from_function_call( - name=tool_call.function.name, - args=args - )) + parts.append(self._function_call_part_from_tool_call(tool_call)) gemini_messages.append(types.Content( role="model", @@ -834,6 +924,7 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] = # Process streaming response full_content = "" + full_reasoning = "" chunk_index = 0 finish_reason = None native_finish_reason = None @@ -853,6 +944,7 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] = ) async for part_response in stream: + thinking_chunk = "" chunk = "" try: if ( @@ -862,19 +954,41 @@ async def chat_stream(self, messages: List[Message],callbacks: Optional[List] = and getattr(part_response.candidates[0].content, "parts", None) ): parts = part_response.candidates[0].content.parts - chunk = "".join( - [p.text for p in parts if getattr(p, "text", None)] - ) + thinking_chunk, chunk, _ = self._extract_stream_parts(parts) except Exception: + thinking_chunk = "" chunk = "" # Fallback: some SDK responses expose streaming text via `part_response.text` - if not chunk: + if not thinking_chunk and not chunk: maybe_text = getattr(part_response, "text", None) if isinstance(maybe_text, str): chunk = maybe_text + if thinking_chunk: + full_reasoning += thinking_chunk + yield LLMResponseChunk( + content=full_reasoning, + delta=thinking_chunk, + provider="gemini", + model=model, + finish_reason=None, + tool_calls=[], + usage=usage_data, + metadata={ + "chunk_index": chunk_index, + "type": "thinking", + "phase": "think", + "provider": "gemini", + "channel": "thinking", + }, + chunk_index=chunk_index, + ) + chunk_index += 1 + if not chunk: + if thinking_chunk: + continue continue full_content += chunk @@ -1013,13 +1127,14 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw ) full_content = "" + full_reasoning = "" emitted_chunk_count = 0 tool_calls: List[ToolCall] = [] finish_reason = "stop" native_finish_reason = "stop" async for part_response in stream: - # Incremental text + thinking_text = "" delta_text = "" try: if ( @@ -1028,37 +1143,52 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw and getattr(part_response.candidates[0], "content", None) is not None and getattr(part_response.candidates[0].content, "parts", None) ): - for part in part_response.candidates[0].content.parts: - text = getattr(part, "text", None) - if text: - delta_text += text - function_call = getattr(part, "function_call", None) - if function_call: - arguments_json = json.dumps(function_call.args) if function_call.args else "{}" - tool_calls.append( - ToolCall( - id=f"call_{uuid.uuid4().hex[:8]}", - type="function", - function=Function( - name=function_call.name, - arguments=arguments_json, - ), - ) - ) - finish_reason = "tool_calls" + thinking_text, delta_text, new_tool_calls = self._extract_stream_parts( + part_response.candidates[0].content.parts + ) + if new_tool_calls: + tool_calls.extend(new_tool_calls) + finish_reason = "tool_calls" except Exception: + thinking_text = "" delta_text = "" - if not delta_text: + if not thinking_text and not delta_text: maybe_text = getattr(part_response, "text", None) if isinstance(maybe_text, str): delta_text = maybe_text + if thinking_text: + full_reasoning += thinking_text + try: + output_queue.put_nowait( + build_output_queue_event( + event_type="thinking", + delta=thinking_text, + metadata={ + "phase": "think", + "provider": "gemini", + "channel": "thinking", + }, + ) + ) + except Exception: + pass + if delta_text: full_content += delta_text emitted_chunk_count += 1 try: - output_queue.put_nowait({"content": delta_text}) + output_queue.put_nowait( + build_output_queue_event( + event_type="content", + delta=delta_text, + metadata={ + "provider": "gemini", + "channel": "text", + }, + ) + ) except Exception: pass @@ -1084,6 +1214,7 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw metadata={ "streamed_content": emitted_chunk_count > 0, "stream_chunk_count": emitted_chunk_count, + **({"reasoning": full_reasoning} if full_reasoning else {}), }, ) @@ -1142,6 +1273,8 @@ def _convert_response(self, response, duration: float, *, model: Optional[str] = response_text = "" image_paths = [] tool_calls = [] + reasoning = "" + saw_parts = False # Check if there are candidate results if hasattr(response, "candidates") and response.candidates: @@ -1149,10 +1282,16 @@ def _convert_response(self, response, duration: float, *, model: Optional[str] = if hasattr(candidate, "content") and candidate.content: # Iterate through all parts parts = getattr(candidate.content, "parts", None) or [] + saw_parts = bool(parts) for part in parts: # Check if there is text content if hasattr(part, "text") and part.text: - if content: + if getattr(part, "thought", False): + if reasoning: + reasoning += "\n" + part.text + else: + reasoning = part.text + elif content: content += "\n" + part.text else: content = part.text @@ -1205,7 +1344,7 @@ def _convert_response(self, response, duration: float, *, model: Optional[str] = # If no text content was obtained, fall back to response.text (if available). # Some Gemini SDK responses keep `response.text` populated even when # `candidates[0].content.parts` is empty. - if not content: + if not content and not saw_parts and not reasoning: fallback_text = self._safe_get_response_text(response) if fallback_text: response_text = fallback_text @@ -1229,7 +1368,8 @@ def _convert_response(self, response, duration: float, *, model: Optional[str] = duration=duration, metadata={ "image_paths": image_paths, - "has_images": len(image_paths) > 0 + "has_images": len(image_paths) > 0, + **({"reasoning": reasoning} if reasoning else {}), } ) @@ -1238,6 +1378,8 @@ def _convert_tool_response(self, response, duration: float, *, model: Optional[s content = "" tool_calls = [] finish_reason = "stop" + reasoning = "" + saw_parts = False # Check if there are candidate results if hasattr(response, "candidates") and response.candidates: @@ -1245,39 +1387,30 @@ def _convert_tool_response(self, response, duration: float, *, model: Optional[s if hasattr(candidate, "content") and candidate.content: # Iterate through all parts parts = getattr(candidate.content, "parts", None) or [] + saw_parts = bool(parts) for part in parts: # Check if there is text content if hasattr(part, "text") and part.text: - if content: + if getattr(part, "thought", False): + if reasoning: + reasoning += "\n" + part.text + else: + reasoning = part.text + elif content: content += "\n" + part.text else: content = part.text # Check if there is a function call elif hasattr(part, "function_call") and part.function_call: - # Convert Gemini function call to our ToolCall format - function_call = part.function_call - - # Generate a unique ID for the tool call - import uuid - tool_call_id = f"call_{uuid.uuid4().hex[:8]}" - - # Convert arguments to JSON string - import json - arguments_json = json.dumps(function_call.args) if function_call.args else "{}" - - tool_call = ToolCall( - id=tool_call_id, - type="function", - function=Function( - name=function_call.name, - arguments=arguments_json - ) + tool_call = self._tool_call_from_function_call( + part.function_call, + thought_signature=getattr(part, "thought_signature", None), ) tool_calls.append(tool_call) finish_reason = "tool_calls" # If no content was obtained, fall back to response.text (if available). - if not content: + if not content and not saw_parts and not reasoning: fallback_text = self._safe_get_response_text(response) if fallback_text: content = fallback_text @@ -1290,7 +1423,9 @@ def _convert_tool_response(self, response, duration: float, *, model: Optional[s native_finish_reason=finish_reason, tool_calls=tool_calls, duration=duration, - metadata={} + metadata={ + **({"reasoning": reasoning} if reasoning else {}), + } ) def get_metadata(self) -> ProviderMetadata: diff --git a/spoon_ai/llm/providers/openai_compatible_provider.py b/spoon_ai/llm/providers/openai_compatible_provider.py index a717027..85ae050 100644 --- a/spoon_ai/llm/providers/openai_compatible_provider.py +++ b/spoon_ai/llm/providers/openai_compatible_provider.py @@ -55,16 +55,25 @@ def _uses_completion_token_param(self, model: str) -> bool: tail = model_lower.split("/")[-1] # strip any provider prefix like openrouter return tail.startswith("gpt-5") or tail.startswith("o1") or tail.startswith("o3") or tail.startswith("o4") - def _supports_temperature(self, model: str) -> bool: + def _supports_temperature(self, model: str, reasoning_effort: str | None = None) -> bool: """Whether this model supports custom temperature values. - Some newer OpenAI models (gpt-5.1*, o1*, o3*, o4*) only support the - default temperature value (1.0) and will error on other values. + GPT-5 reasoning models reject explicit temperature settings more often + than the legacy chat-completions family: + - `gpt-5` and size-suffixed variants like `gpt-5-mini`: no temperature support + - versioned GPT-5 models like `gpt-5.4`: temperature only when reasoning is disabled + - `o*`: no temperature support """ model_lower = (model or "").lower() tail = model_lower.split("/")[-1] # strip any provider prefix - # gpt-5.1 and reasoning models don't support custom temperature - return not (tail.startswith("gpt-5.1") or tail.startswith("o1") or tail.startswith("o3") or tail.startswith("o4")) + if tail.startswith("o1") or tail.startswith("o3") or tail.startswith("o4"): + return False + if tail.startswith("gpt-5."): + normalized_effort = str(reasoning_effort or "").strip().lower() + return normalized_effort in {"", "none"} + if tail == "gpt-5" or tail.startswith("gpt-5-"): + return False + return True def _max_token_kwargs(self, model: str, max_tokens: int, overrides: Dict[str, Any]) -> Dict[str, Any]: """ @@ -833,7 +842,7 @@ async def chat(self, messages: List[Message], **kwargs) -> LLMResponse: "stream": False, } # Only add temperature for models that support it - if self._supports_temperature(model): + if self._supports_temperature(model, kwargs.get("reasoning_effort")): request_kwargs["temperature"] = temperature request_kwargs.update(self._max_token_kwargs(model, max_tokens, kwargs)) @@ -841,7 +850,10 @@ async def chat(self, messages: List[Message], **kwargs) -> LLMResponse: request_kwargs["tools"] = tools request_kwargs["tool_choice"] = tool_choice - extra_keys = {'model', 'max_tokens', 'max_completion_tokens', 'temperature', 'thinking', 'tools', 'tool_choice'} + extra_keys = { + 'model', 'max_tokens', 'max_completion_tokens', 'temperature', + 'thinking', 'reasoning_effort', 'tools', 'tool_choice' + } request_kwargs.update({k: v for k, v in kwargs.items() if k not in extra_keys}) self._apply_reasoning_defaults(request_kwargs, kwargs) @@ -893,7 +905,7 @@ async def chat_stream(self,messages: List[Message],callbacks: Optional[List[Base "stream_options": {"include_usage": True}, } # Only add temperature for models that support it - if self._supports_temperature(model): + if self._supports_temperature(model, kwargs.get("reasoning_effort")): request_kwargs["temperature"] = temperature request_kwargs.update(self._max_token_kwargs(model, max_tokens, kwargs)) @@ -901,7 +913,10 @@ async def chat_stream(self,messages: List[Message],callbacks: Optional[List[Base request_kwargs["tools"] = tools request_kwargs["tool_choice"] = tool_choice - extra_keys = {'model', 'max_tokens', 'max_completion_tokens', 'temperature', 'thinking', 'callbacks', 'tools', 'tool_choice'} + extra_keys = { + 'model', 'max_tokens', 'max_completion_tokens', 'temperature', + 'thinking', 'reasoning_effort', 'callbacks', 'tools', 'tool_choice' + } request_kwargs.update({k: v for k, v in kwargs.items() if k not in extra_keys}) self._apply_reasoning_defaults(request_kwargs, kwargs) @@ -1104,7 +1119,7 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw "messages": openai_messages, } # Only add temperature for models that support it - if self._supports_temperature(model): + if self._supports_temperature(model, kwargs.get("reasoning_effort")): request_kwargs["temperature"] = temperature request_kwargs.update(self._max_token_kwargs(model, max_tokens, kwargs)) @@ -1112,7 +1127,10 @@ async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kw request_kwargs["tools"] = tools request_kwargs["tool_choice"] = tool_choice - extra_keys = {'model', 'max_tokens', 'max_completion_tokens', 'temperature', 'thinking', 'tool_choice', 'output_queue'} + extra_keys = { + 'model', 'max_tokens', 'max_completion_tokens', 'temperature', + 'thinking', 'reasoning_effort', 'tool_choice', 'output_queue' + } request_kwargs.update({k: v for k, v in kwargs.items() if k not in extra_keys}) self._apply_reasoning_defaults(request_kwargs, kwargs) diff --git a/spoon_ai/llm/providers/openai_provider.py b/spoon_ai/llm/providers/openai_provider.py index 34dfc45..4770e20 100644 --- a/spoon_ai/llm/providers/openai_provider.py +++ b/spoon_ai/llm/providers/openai_provider.py @@ -2,10 +2,22 @@ OpenAI Provider implementation for the unified LLM interface. """ -from typing import Dict, Any +from __future__ import annotations + +import asyncio +import json +from datetime import datetime from logging import getLogger +from typing import Any, AsyncIterator, Dict, List, Optional +from uuid import uuid4 + +from spoon_ai.callbacks.base import BaseCallbackHandler +from spoon_ai.callbacks.manager import CallbackManager +from spoon_ai.schema import LLMResponseChunk, Message, ToolCall, Function +from spoon_ai.utils.streaming import build_output_queue_event -from ..interface import ProviderMetadata, ProviderCapability +from ..errors import ProviderError +from ..interface import LLMResponse, ProviderMetadata, ProviderCapability from ..registry import register_provider from .openai_compatible_provider import OpenAICompatibleProvider @@ -20,13 +32,574 @@ ]) class OpenAIProvider(OpenAICompatibleProvider): """OpenAI provider implementation.""" - + def __init__(self): super().__init__() self.provider_name = "openai" self.default_base_url = "https://api.openai.com/v1" self.default_model = "gpt-4.1" - + + @staticmethod + def _stringify_responses_content(content: Any) -> str: + if isinstance(content, str): + return content + if content is None: + return "" + if isinstance(content, (dict, list)): + try: + return json.dumps(content, ensure_ascii=False) + except TypeError: + return str(content) + return str(content) + + def _supports_responses_reasoning( + self, + messages: List[Message], + tools: List[Dict[str, Any]] | None, + kwargs: Dict[str, Any], + ) -> bool: + if not (kwargs.get("thinking") or kwargs.get("reasoning_effort")): + return False + try: + openai_messages = self._convert_messages(messages) + self._convert_messages_to_responses_input(openai_messages) + if tools: + self._convert_tools_to_responses(tools) + self._convert_tool_choice_to_responses(kwargs.get("tool_choice", "auto")) + except ValueError: + return False + return True + + def _convert_messages_to_responses_input( + self, + messages: List[Dict[str, Any]], + ) -> List[Dict[str, Any]]: + input_items: List[Dict[str, Any]] = [] + + for message in messages: + role = str(message.get("role") or "") + content = message.get("content") + tool_calls = message.get("tool_calls") or [] + + if role == "tool": + tool_call_id = message.get("tool_call_id") + if not tool_call_id: + raise ValueError("Responses API requires tool_call_id for tool outputs") + input_items.append( + { + "type": "function_call_output", + "call_id": tool_call_id, + "output": self._stringify_responses_content(content), + } + ) + continue + + if content is not None: + if not isinstance(content, str): + raise ValueError("Responses reasoning path currently supports text-only messages") + input_items.append( + { + "type": "message", + "role": role, + "content": content, + } + ) + + if role == "assistant" and tool_calls: + for tool_call in tool_calls: + function_payload = tool_call.get("function") or {} + input_items.append( + { + "type": "function_call", + "call_id": tool_call.get("id"), + "name": function_payload.get("name") or "unknown", + "arguments": function_payload.get("arguments") or "{}", + } + ) + + return input_items + + @staticmethod + def _convert_tools_to_responses(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + converted: List[Dict[str, Any]] = [] + for tool in tools: + if tool.get("type") != "function": + raise ValueError("Responses reasoning path currently supports function tools only") + function_payload = tool.get("function") or {} + converted.append( + { + "type": "function", + "name": function_payload.get("name") or "unknown", + "description": function_payload.get("description"), + "parameters": function_payload.get("parameters") or {"type": "object", "properties": {}}, + } + ) + return converted + + @staticmethod + def _convert_tool_choice_to_responses(tool_choice: Any) -> Any: + if tool_choice in {None, "auto", "required", "none"}: + return tool_choice or "auto" + if isinstance(tool_choice, dict): + if tool_choice.get("type") == "function": + function_payload = tool_choice.get("function") or {} + name = function_payload.get("name") or tool_choice.get("name") + if name: + return {"type": "function", "name": name} + choice_type = tool_choice.get("type") + if choice_type in {"auto", "required", "none"}: + return choice_type + return tool_choice + + def _build_responses_reasoning(self, kwargs: Dict[str, Any]) -> Dict[str, Any] | None: + if not (kwargs.get("thinking") or kwargs.get("reasoning_effort")): + return None + + reasoning: Dict[str, Any] = {"summary": "detailed"} + effort = kwargs.get("reasoning_effort") + if effort: + reasoning["effort"] = effort + return reasoning + + def _build_responses_request_kwargs( + self, + messages: List[Message], + kwargs: Dict[str, Any], + *, + tools: List[Dict[str, Any]] | None = None, + stream: bool, + ) -> Dict[str, Any]: + openai_messages = self._convert_messages(messages) + model = kwargs.get("model", self.model) + max_tokens = kwargs.get("max_completion_tokens", kwargs.get("max_tokens", self.max_tokens)) + temperature = kwargs.get("temperature", self.temperature) + + request_kwargs: Dict[str, Any] = { + "model": model, + "input": self._convert_messages_to_responses_input(openai_messages), + "stream": stream, + "max_output_tokens": max_tokens, + } + if self._supports_temperature(model, kwargs.get("reasoning_effort")): + request_kwargs["temperature"] = temperature + + reasoning = self._build_responses_reasoning(kwargs) + if reasoning: + request_kwargs["reasoning"] = reasoning + + if tools: + request_kwargs["tools"] = self._convert_tools_to_responses(tools) + request_kwargs["tool_choice"] = self._convert_tool_choice_to_responses( + kwargs.get("tool_choice", "auto") + ) + + extra_keys = { + "model", + "max_tokens", + "max_completion_tokens", + "temperature", + "thinking", + "reasoning_effort", + "tools", + "tool_choice", + "callbacks", + "output_queue", + } + request_kwargs.update({k: v for k, v in kwargs.items() if k not in extra_keys}) + return request_kwargs + + @staticmethod + def _extract_responses_text(response: Any) -> str: + direct = getattr(response, "output_text", None) + if isinstance(direct, str) and direct: + return direct + + texts: List[str] = [] + for item in getattr(response, "output", []) or []: + if getattr(item, "type", None) != "message": + continue + for content_part in getattr(item, "content", []) or []: + if getattr(content_part, "type", None) == "output_text": + text = getattr(content_part, "text", None) + if text: + texts.append(str(text)) + return "".join(texts) + + @staticmethod + def _extract_responses_reasoning(response: Any) -> str: + parts: List[str] = [] + for item in getattr(response, "output", []) or []: + if getattr(item, "type", None) != "reasoning": + continue + for summary_item in getattr(item, "summary", []) or []: + text = getattr(summary_item, "text", None) + if text: + parts.append(str(text)) + return "\n\n".join(parts) + + @staticmethod + def _extract_responses_tool_calls(response: Any) -> List[ToolCall]: + tool_calls: List[ToolCall] = [] + for item in getattr(response, "output", []) or []: + if getattr(item, "type", None) != "function_call": + continue + tool_calls.append( + ToolCall( + id=getattr(item, "call_id", None) or getattr(item, "id", ""), + type="function", + function=Function( + name=getattr(item, "name", None) or "unknown", + arguments=getattr(item, "arguments", None) or "{}", + ), + ) + ) + return tool_calls + + @staticmethod + def _responses_finish_reason(response: Any, tool_calls: List[ToolCall]) -> str: + if tool_calls: + return "tool_calls" + incomplete = getattr(response, "incomplete_details", None) + reason = getattr(incomplete, "reason", None) + if reason == "max_output_tokens": + return "length" + if reason == "content_filter": + return "content_filter" + return "stop" + + @staticmethod + def _responses_usage(response: Any) -> Dict[str, int] | None: + usage = getattr(response, "usage", None) + if usage is None: + return None + prompt_tokens = getattr(usage, "input_tokens", None) + completion_tokens = getattr(usage, "output_tokens", None) + total_tokens = getattr(usage, "total_tokens", None) + if prompt_tokens is None and completion_tokens is None and total_tokens is None: + return None + return { + "prompt_tokens": prompt_tokens or 0, + "completion_tokens": completion_tokens or 0, + "total_tokens": total_tokens or 0, + } + + def _convert_responses_response(self, response: Any, duration: float) -> LLMResponse: + tool_calls = self._extract_responses_tool_calls(response) + reasoning_text = self._extract_responses_reasoning(response) + finish_reason = self._responses_finish_reason(response, tool_calls) + metadata = { + "response_id": getattr(response, "id", ""), + "created": getattr(response, "created_at", None), + } + if reasoning_text: + metadata["reasoning"] = reasoning_text + + return LLMResponse( + content=self._extract_responses_text(response), + provider=self.get_provider_name(), + model=getattr(response, "model", self.model), + finish_reason=finish_reason, + native_finish_reason=finish_reason, + tool_calls=tool_calls, + usage=self._responses_usage(response), + duration=duration, + metadata=metadata, + ) + + async def chat(self, messages: List[Message], **kwargs) -> LLMResponse: + if not self.client or not self._supports_responses_reasoning(messages, None, kwargs): + return await super().chat(messages, **kwargs) + + start_time = asyncio.get_event_loop().time() + response = await self.client.responses.create( + **self._build_responses_request_kwargs(messages, kwargs, stream=False) + ) + duration = asyncio.get_event_loop().time() - start_time + return self._convert_responses_response(response, duration) + + def _build_responses_stream_chunk( + self, + *, + content: str, + delta: str, + model: str, + finish_reason: str | None, + tool_calls: List[ToolCall], + tool_call_chunks: List[Dict[str, Any]] | None = None, + usage: Dict[str, int] | None = None, + metadata: Dict[str, Any] | None = None, + chunk_index: int = 0, + ) -> LLMResponseChunk: + return LLMResponseChunk( + content=content, + delta=delta, + provider=self.get_provider_name(), + model=model, + finish_reason=finish_reason, + tool_calls=tool_calls, + tool_call_chunks=tool_call_chunks, + usage=usage, + metadata=metadata or {}, + chunk_index=chunk_index, + timestamp=datetime.now().isoformat(), + ) + + async def chat_stream( + self, + messages: List[Message], + callbacks: Optional[List[BaseCallbackHandler]] = None, + **kwargs, + ) -> AsyncIterator[LLMResponseChunk]: + tools = kwargs.get("tools") + if not self.client or not self._supports_responses_reasoning(messages, tools, kwargs): + async for chunk in super().chat_stream(messages, callbacks=callbacks, **kwargs): + yield chunk + return + + callback_manager = CallbackManager.from_callbacks(callbacks) + run_id = uuid4() + model = kwargs.get("model", self.model) + start_time = asyncio.get_event_loop().time() + + await callback_manager.on_llm_start( + run_id=run_id, + messages=messages, + model=model, + provider=self.get_provider_name(), + ) + + try: + request_kwargs = self._build_responses_request_kwargs( + messages, + kwargs, + tools=tools, + stream=True, + ) + stream = await self.client.responses.create(**request_kwargs) + + full_content = "" + full_reasoning = "" + chunk_index = 0 + tool_calls: List[ToolCall] = [] + latest_response = None + + async for event in stream: + event_type = getattr(event, "type", None) + if event_type == "response.reasoning_summary_text.delta": + delta = getattr(event, "delta", "") or "" + if not delta: + continue + full_reasoning += delta + yield self._build_responses_stream_chunk( + content=full_reasoning, + delta=delta, + model=model, + finish_reason=None, + tool_calls=tool_calls, + metadata={ + "type": "thinking", + "phase": "think", + "provider": self.get_provider_name(), + "channel": "thinking", + }, + chunk_index=chunk_index, + ) + chunk_index += 1 + continue + + if event_type == "response.output_text.delta": + delta = getattr(event, "delta", "") or "" + if not delta: + continue + full_content += delta + response_chunk = self._build_responses_stream_chunk( + content=full_content, + delta=delta, + model=model, + finish_reason=None, + tool_calls=tool_calls, + metadata={ + "provider": self.get_provider_name(), + "channel": "text", + }, + chunk_index=chunk_index, + ) + await callback_manager.on_llm_new_token( + token=delta, + chunk=response_chunk, + run_id=run_id, + ) + yield response_chunk + chunk_index += 1 + continue + + if event_type == "response.output_item.done": + item = getattr(event, "item", None) + if getattr(item, "type", None) != "function_call": + continue + tool_calls = [ + *tool_calls, + ToolCall( + id=getattr(item, "call_id", None) or getattr(item, "id", ""), + type="function", + function=Function( + name=getattr(item, "name", None) or "unknown", + arguments=getattr(item, "arguments", None) or "{}", + ), + ), + ] + yield self._build_responses_stream_chunk( + content=full_content, + delta="", + model=model, + finish_reason=None, + tool_calls=tool_calls, + tool_call_chunks=[ + { + "index": getattr(event, "output_index", len(tool_calls) - 1), + "id": tool_calls[-1].id, + "type": "function", + "function": { + "name": tool_calls[-1].function.name, + "arguments": tool_calls[-1].function.arguments, + }, + } + ], + metadata={ + "provider": self.get_provider_name(), + "channel": "tool", + }, + chunk_index=chunk_index, + ) + chunk_index += 1 + continue + + if event_type == "response.completed": + latest_response = getattr(event, "response", None) + + if latest_response is None: + raise RuntimeError("OpenAI Responses stream completed without a final response") + + final_content = self._extract_responses_text(latest_response) + if final_content and len(final_content) >= len(full_content): + full_content = final_content + + final_tool_calls = self._extract_responses_tool_calls(latest_response) or tool_calls + finish_reason = self._responses_finish_reason(latest_response, final_tool_calls) + usage = self._responses_usage(latest_response) + final_metadata: Dict[str, Any] = { + "response_id": getattr(latest_response, "id", ""), + "created": getattr(latest_response, "created_at", None), + } + if full_reasoning: + final_metadata["reasoning"] = full_reasoning + + final_chunk = self._build_responses_stream_chunk( + content=full_content, + delta="", + model=getattr(latest_response, "model", model), + finish_reason=finish_reason, + tool_calls=final_tool_calls, + usage=usage, + metadata=final_metadata, + chunk_index=chunk_index, + ) + yield final_chunk + + duration = asyncio.get_event_loop().time() - start_time + await callback_manager.on_llm_end( + response=LLMResponse( + content=full_content, + provider=self.get_provider_name(), + model=getattr(latest_response, "model", model), + finish_reason=finish_reason, + native_finish_reason=finish_reason, + tool_calls=final_tool_calls, + usage=usage, + duration=duration, + metadata=final_metadata, + ), + run_id=run_id, + ) + except Exception as e: + await callback_manager.on_llm_error( + error=e, + run_id=run_id, + ) + await self._handle_error(e) + + async def chat_with_tools(self, messages: List[Message], tools: List[Dict], **kwargs) -> LLMResponse: + if not self.client or not self._supports_responses_reasoning(messages, tools, kwargs): + return await super().chat_with_tools(messages, tools, **kwargs) + + start_time = asyncio.get_event_loop().time() + output_queue = kwargs.get("output_queue") + request_kwargs = self._build_responses_request_kwargs( + messages, + kwargs, + tools=tools, + stream=output_queue is not None, + ) + + if output_queue is None: + response = await self.client.responses.create(**request_kwargs) + duration = asyncio.get_event_loop().time() - start_time + return self._convert_responses_response(response, duration) + + stream = await self.client.responses.create(**request_kwargs) + latest_response = None + full_reasoning = "" + + async for event in stream: + event_type = getattr(event, "type", None) + if event_type == "response.reasoning_summary_text.delta": + delta = getattr(event, "delta", "") or "" + if delta: + full_reasoning += delta + try: + output_queue.put_nowait( + build_output_queue_event( + event_type="thinking", + delta=delta, + metadata={ + "phase": "think", + "provider": self.get_provider_name(), + "channel": "thinking", + }, + ) + ) + except Exception: + pass + elif event_type == "response.output_text.delta": + delta = getattr(event, "delta", "") or "" + if delta: + try: + output_queue.put_nowait( + build_output_queue_event( + event_type="content", + delta=delta, + metadata={ + "provider": self.get_provider_name(), + "channel": "text", + }, + ) + ) + except Exception: + pass + elif event_type == "response.completed": + latest_response = getattr(event, "response", None) + + if latest_response is None: + raise RuntimeError("OpenAI Responses stream completed without a final response") + + duration = asyncio.get_event_loop().time() - start_time + result = self._convert_responses_response(latest_response, duration) + if full_reasoning: + result.metadata["reasoning"] = full_reasoning + result.metadata["streamed_content"] = bool(result.content) + result.metadata["stream_chunk_count"] = 0 + return result + def get_metadata(self) -> ProviderMetadata: """Get OpenAI provider metadata.""" return ProviderMetadata( @@ -44,4 +617,4 @@ def get_metadata(self) -> ProviderMetadata: "requests_per_minute": 3500, "tokens_per_minute": 90000 } - ) \ No newline at end of file + ) diff --git a/spoon_ai/schema.py b/spoon_ai/schema.py index c614f64..be581b9 100644 --- a/spoon_ai/schema.py +++ b/spoon_ai/schema.py @@ -52,6 +52,10 @@ class ToolCall(BaseModel): id: str type: str = "function" function: Function + metadata: Optional[dict[str, Any]] = Field( + default=None, + description="Optional provider-specific metadata for preserving tool-call context", + ) class AgentState(str, Enum): """ diff --git a/tests/test_agent_llm_integration.py b/tests/test_agent_llm_integration.py index 98e2d3d..00bf39f 100644 --- a/tests/test_agent_llm_integration.py +++ b/tests/test_agent_llm_integration.py @@ -191,6 +191,33 @@ async def test_toolcall_agent_with_tools(self, mock_chatbot_manager, tool_manage tool_input={"param": "value"} ) assert "Tool executed successfully" in result + + @pytest.mark.asyncio + async def test_toolcall_agent_preserves_tool_call_metadata_in_memory(self, mock_chatbot_manager, tool_manager): + agent = ToolCallAgent( + name="test_agent", + llm=mock_chatbot_manager, + available_tools=tool_manager, + ) + + await agent.add_message( + "assistant", + "I'll use a tool.", + tool_calls=[ + ToolCall( + id="call_sig", + type="function", + function=Function( + name="test_tool", + arguments='{"param":"value"}', + ), + metadata={"thought_signature": "c2lnLTEyMw=="}, + ) + ], + ) + + stored = agent.memory.messages[-1].tool_calls[0] + assert stored.metadata == {"thought_signature": "c2lnLTEyMw=="} @pytest.mark.asyncio async def test_spoon_react_ai_initialization(self): @@ -206,9 +233,11 @@ async def test_spoon_react_ai_initialization(self): # Verify it uses the new architecture assert agent.llm.use_llm_manager is True - def test_spoon_react_run_signatures_accept_thinking(self): + def test_spoon_react_run_signatures_accept_reasoning_kwargs(self): assert "thinking" in inspect.signature(SpoonReactAI.run).parameters assert "thinking" in inspect.signature(SpoonReactSkill.run).parameters + assert "reasoning_effort" in inspect.signature(SpoonReactAI.run).parameters + assert "reasoning_effort" in inspect.signature(SpoonReactSkill.run).parameters @pytest.mark.asyncio async def test_spoon_react_ai_fallback_to_legacy(self): @@ -328,7 +357,7 @@ async def test_streamed_tool_response_does_not_enqueue_full_content_twice(self, assert all(call.args != ({"content": "already streamed full text"},) for call in put_calls) @pytest.mark.asyncio - async def test_toolcall_agent_emits_explicit_thinking_for_non_streamed_pre_tool_content(self, mock_chatbot_manager, tool_manager): + async def test_toolcall_agent_emits_progress_content_for_non_streamed_pre_tool_content(self, mock_chatbot_manager, tool_manager): mock_tool_call = ToolCall( id="call_123", type="function", @@ -361,11 +390,11 @@ async def test_toolcall_agent_emits_explicit_thinking_for_non_streamed_pre_tool_ assert should_continue is True put_calls = mock_queue.put_nowait.call_args_list assert put_calls[0].args[0] == { - "type": "thinking", + "type": "content", "delta": "First I will inspect the workspace.", "content": "First I will inspect the workspace.", "metadata": { - "phase": "think", + "phase": "progress", "source": "toolcall_agent", }, } diff --git a/tests/test_tool_streaming_output.py b/tests/test_tool_streaming_output.py index 1b78df9..496f20b 100644 --- a/tests/test_tool_streaming_output.py +++ b/tests/test_tool_streaming_output.py @@ -16,6 +16,7 @@ from spoon_ai.llm.providers.gemini_provider import GeminiProvider from spoon_ai.llm.providers.ollama_provider import OllamaProvider from spoon_ai.llm.providers.openai_compatible_provider import OpenAICompatibleProvider +from spoon_ai.llm.providers.openai_provider import OpenAIProvider from spoon_ai.llm.providers.openrouter_provider import OpenRouterProvider @@ -187,6 +188,34 @@ async def test_chatbot_ask_tool_normalizes_anthropic_reasoning_effort(): assert call.kwargs["output_config"] == {"effort": "high"} +@pytest.mark.asyncio +async def test_chatbot_ask_tool_enables_anthropic_adaptive_thinking_from_reasoning_effort_alone(): + mock_manager = SimpleNamespace(chat_with_tools=AsyncMock()) + mock_manager.chat_with_tools.return_value = LLMResponse( + content="ok", + provider="anthropic", + model="claude-sonnet-4.6", + finish_reason="stop", + native_finish_reason="stop", + ) + + with patch("spoon_ai.chat.get_llm_manager", return_value=mock_manager): + bot = ChatBot( + use_llm_manager=True, + llm_provider="anthropic", + model_name="claude-sonnet-4.6", + ) + await bot.ask_tool( + [{"role": "user", "content": "hi"}], + tools=_tool_spec(), + reasoning_effort="high", + ) + + call = mock_manager.chat_with_tools.call_args + assert call.kwargs["thinking"] == {"type": "adaptive"} + assert call.kwargs["output_config"] == {"effort": "high"} + + @pytest.mark.asyncio async def test_chatbot_ask_tool_maps_anthropic_xhigh_to_max_for_opus_46(): mock_manager = SimpleNamespace(chat_with_tools=AsyncMock()) @@ -358,6 +387,252 @@ async def test_openai_chat_with_tools_streams_deltas_to_output_queue(): assert response.metadata.get("streamed_content") is True +def test_openai_supports_temperature_for_gpt_54_only_without_reasoning(): + provider = OpenAICompatibleProvider() + + assert provider._supports_temperature("gpt-5.4", reasoning_effort="none") is True + assert provider._supports_temperature("gpt-5.4", reasoning_effort=None) is True + assert provider._supports_temperature("gpt-5.4", reasoning_effort="high") is False + assert provider._supports_temperature("gpt-5.4", reasoning_effort="medium") is False + + +@pytest.mark.asyncio +async def test_openai_chat_with_tools_uses_responses_reasoning_summary_when_effort_requested(): + provider = OpenAIProvider() + provider.model = "gpt-5.4" + + completed_response = SimpleNamespace( + id="resp_123", + created_at=123.0, + model="gpt-5.4", + output=[ + SimpleNamespace( + type="reasoning", + summary=[ + SimpleNamespace( + text="Plan: inspect the latest game state before choosing a move." + ) + ], + ), + SimpleNamespace( + type="function_call", + id="fc_123", + call_id="call_123", + name="echo_tool", + arguments='{"text":"hello"}', + ), + ], + usage=SimpleNamespace( + input_tokens=10, + output_tokens=7, + total_tokens=17, + ), + ) + stream_items = [ + SimpleNamespace( + type="response.reasoning_summary_text.delta", + delta="Plan: inspect the latest game state before choosing a move.", + output_index=0, + summary_index=0, + item_id="rs_123", + ), + SimpleNamespace( + type="response.output_item.done", + output_index=1, + item=SimpleNamespace( + type="function_call", + id="fc_123", + call_id="call_123", + name="echo_tool", + arguments='{"text":"hello"}', + ), + ), + SimpleNamespace( + type="response.completed", + response=completed_response, + ), + ] + provider.client = SimpleNamespace( + responses=SimpleNamespace(create=AsyncMock(return_value=_AsyncItems(stream_items))), + chat=SimpleNamespace( + completions=SimpleNamespace(create=AsyncMock()), + ), + ) + + q: asyncio.Queue = asyncio.Queue() + response = await provider.chat_with_tools( + messages=[Message(role="user", content="hi")], + tools=_tool_spec(), + output_queue=q, + reasoning_effort="high", + ) + + streamed_events: list[dict] = [] + while not q.empty(): + streamed_events.append(await q.get()) + + assert streamed_events == [ + { + "type": "thinking", + "delta": "Plan: inspect the latest game state before choosing a move.", + "content": "Plan: inspect the latest game state before choosing a move.", + "metadata": { + "phase": "think", + "provider": "openai", + "channel": "thinking", + }, + } + ] + assert response.content == "" + assert response.tool_calls[0].id == "call_123" + assert response.tool_calls[0].function.name == "echo_tool" + assert response.tool_calls[0].function.arguments == '{"text":"hello"}' + assert response.metadata["reasoning"] == ( + "Plan: inspect the latest game state before choosing a move." + ) + assert provider.client.responses.create.await_count == 1 + assert provider.client.chat.completions.create.await_count == 0 + request_kwargs = provider.client.responses.create.await_args.kwargs + assert request_kwargs["reasoning"] == {"effort": "high", "summary": "detailed"} + assert "temperature" not in request_kwargs + assert request_kwargs["tools"] == [ + { + "type": "function", + "name": "echo_tool", + "description": "Echo text", + "parameters": { + "type": "object", + "properties": {"text": {"type": "string"}}, + "required": ["text"], + }, + } + ] + + +@pytest.mark.asyncio +async def test_openai_chat_stream_uses_responses_reasoning_summary_when_effort_requested(): + provider = OpenAIProvider() + provider.model = "gpt-5.4" + + completed_response = SimpleNamespace( + id="resp_stream_123", + created_at=456.0, + model="gpt-5.4", + output=[ + SimpleNamespace( + type="reasoning", + summary=[ + SimpleNamespace( + text="Plan: inspect the wallet before attempting to join." + ) + ], + ), + SimpleNamespace( + type="function_call", + id="fc_stream_123", + call_id="call_stream_123", + name="echo_tool", + arguments='{"text":"hello"}', + ), + ], + usage=SimpleNamespace( + input_tokens=12, + output_tokens=8, + total_tokens=20, + ), + ) + stream_items = [ + SimpleNamespace( + type="response.reasoning_summary_text.delta", + delta="Plan: inspect the wallet before attempting to join.", + output_index=0, + summary_index=0, + item_id="rs_stream_123", + ), + SimpleNamespace( + type="response.output_text.delta", + delta="Wallet looks ready.", + ), + SimpleNamespace( + type="response.output_item.done", + output_index=1, + item=SimpleNamespace( + type="function_call", + id="fc_stream_123", + call_id="call_stream_123", + name="echo_tool", + arguments='{"text":"hello"}', + ), + ), + SimpleNamespace( + type="response.completed", + response=completed_response, + ), + ] + provider.client = SimpleNamespace( + responses=SimpleNamespace(create=AsyncMock(return_value=_AsyncItems(stream_items))), + chat=SimpleNamespace( + completions=SimpleNamespace(create=AsyncMock()), + ), + ) + + chunks = [ + chunk + async for chunk in provider.chat_stream( + messages=[Message(role="user", content="hi")], + tools=_tool_spec(), + reasoning_effort="high", + ) + ] + + assert chunks[0].delta == "Plan: inspect the wallet before attempting to join." + assert chunks[0].metadata == { + "type": "thinking", + "phase": "think", + "provider": "openai", + "channel": "thinking", + } + assert chunks[1].delta == "Wallet looks ready." + assert chunks[1].metadata == { + "provider": "openai", + "channel": "text", + } + assert chunks[2].tool_calls[0].id == "call_stream_123" + assert chunks[2].tool_calls[0].function.name == "echo_tool" + assert chunks[2].tool_call_chunks == [ + { + "index": 1, + "id": "call_stream_123", + "type": "function", + "function": { + "name": "echo_tool", + "arguments": '{"text":"hello"}', + }, + } + ] + assert chunks[-1].finish_reason == "tool_calls" + assert chunks[-1].metadata["reasoning"] == ( + "Plan: inspect the wallet before attempting to join." + ) + assert provider.client.responses.create.await_count == 1 + assert provider.client.chat.completions.create.await_count == 0 + request_kwargs = provider.client.responses.create.await_args.kwargs + assert request_kwargs["reasoning"] == {"effort": "high", "summary": "detailed"} + assert "temperature" not in request_kwargs + assert request_kwargs["tools"] == [ + { + "type": "function", + "name": "echo_tool", + "description": "Echo text", + "parameters": { + "type": "object", + "properties": {"text": {"type": "string"}}, + "required": ["text"], + }, + } + ] + + @pytest.mark.asyncio async def test_openrouter_chat_with_tools_streams_reasoning_to_output_queue(): provider = OpenRouterProvider() @@ -478,6 +753,43 @@ async def test_openrouter_chat_with_tools_maps_reasoning_effort_to_extra_body(): assert provider.client.chat.completions.create.call_args.kwargs["extra_body"]["reasoning"] == { "effort": "high" } + assert "reasoning_effort" not in provider.client.chat.completions.create.call_args.kwargs + + +@pytest.mark.asyncio +async def test_openai_compatible_chat_with_tools_drops_top_level_reasoning_effort_for_non_openrouter_requests(): + provider = OpenAICompatibleProvider() + provider.model = "gpt-4.1" + provider.client = SimpleNamespace( + chat=SimpleNamespace( + completions=SimpleNamespace( + create=AsyncMock( + return_value=SimpleNamespace( + id="resp_456", + choices=[ + SimpleNamespace( + message=SimpleNamespace(content="done", tool_calls=None), + finish_reason="stop", + ) + ], + created=456, + usage=None, + model="gpt-4.1", + ) + ) + ) + ) + ) + + await provider.chat_with_tools( + messages=[Message(role="user", content="hi")], + tools=_tool_spec(), + reasoning_effort="high", + ) + + request_kwargs = provider.client.chat.completions.create.call_args.kwargs + assert "reasoning_effort" not in request_kwargs + assert "extra_body" not in request_kwargs @pytest.mark.asyncio @@ -664,6 +976,44 @@ def _stream(**request_kwargs): assert response.content == "ok" +@pytest.mark.asyncio +async def test_anthropic_chat_with_tools_strips_temperature_top_k_and_forced_tool_choice_when_thinking_enabled(): + provider = AnthropicProvider() + provider.model = "claude-opus-4.7" + + captured_kwargs: dict = {} + chunks = [ + SimpleNamespace(type="content_block_start", content_block=SimpleNamespace(type="text")), + SimpleNamespace(type="content_block_delta", delta=SimpleNamespace(type="text_delta", text="ok")), + SimpleNamespace(type="content_block_stop"), + SimpleNamespace(type="message_delta", delta=SimpleNamespace(stop_reason="end_turn")), + SimpleNamespace(type="message_stop", message=SimpleNamespace(stop_reason="end_turn")), + ] + + def _stream(**request_kwargs): + captured_kwargs.update(request_kwargs) + return _AsyncStreamContext(chunks) + + provider.client = SimpleNamespace( + messages=SimpleNamespace(stream=_stream) + ) + + response = await provider.chat_with_tools( + messages=[Message(role="user", content="hi")], + tools=_tool_spec(), + thinking=True, + temperature=0.7, + top_k=5, + tool_choice="required", + ) + + assert captured_kwargs["thinking"] == {"type": "adaptive"} + assert "temperature" not in captured_kwargs + assert "top_k" not in captured_kwargs + assert "tool_choice" not in captured_kwargs + assert response.content == "ok" + + @pytest.mark.asyncio async def test_gemini_chat_with_tools_streams_deltas_to_output_queue(): provider = GeminiProvider() @@ -720,6 +1070,87 @@ def close(self): assert response.metadata.get("streamed_content") is True +@pytest.mark.asyncio +async def test_gemini_chat_with_tools_streams_provider_thinking_and_preserves_tool_call_signature(): + provider = GeminiProvider() + provider.api_key = "test-key" + provider.model = "gemini-2.5-pro" + + signature = b"sig-123" + responses = [ + SimpleNamespace( + candidates=[ + SimpleNamespace( + content=SimpleNamespace( + parts=[ + SimpleNamespace(text="Plan: inspect files.", thought=True), + SimpleNamespace( + function_call=SimpleNamespace( + name="get_weather", + args={"city": "Paris"}, + ), + thought_signature=signature, + ), + ] + ), + finish_reason="STOP", + ) + ] + ), + ] + + class _FakeGeminiClient: + captured_config = None + + def __init__(self, api_key: str): + self.aio = SimpleNamespace( + models=SimpleNamespace( + generate_content_stream=AsyncMock(side_effect=self._generate_content_stream) + ), + aclose=AsyncMock(return_value=None), + ) + + async def _generate_content_stream(self, *, config, **kwargs): + type(self).captured_config = config + return _AsyncItems(responses) + + def close(self): + return None + + with patch("spoon_ai.llm.providers.gemini_provider.genai.Client", _FakeGeminiClient): + q: asyncio.Queue = asyncio.Queue() + response = await provider.chat_with_tools( + messages=[Message(role="user", content="hi")], + tools=_tool_spec(), + output_queue=q, + thinking_budget=128, + ) + + streamed_events: list[dict] = [] + while not q.empty(): + streamed_events.append(await q.get()) + + assert streamed_events == [ + { + "type": "thinking", + "delta": "Plan: inspect files.", + "content": "Plan: inspect files.", + "metadata": { + "phase": "think", + "provider": "gemini", + "channel": "thinking", + }, + } + ] + assert _FakeGeminiClient.captured_config.thinking_config.include_thoughts is True + assert response.finish_reason == "tool_calls" + assert response.metadata["reasoning"] == "Plan: inspect files." + assert response.tool_calls[0].function.name == "get_weather" + assert response.tool_calls[0].metadata == { + "thought_signature": "c2lnLTEyMw==" + } + + @pytest.mark.asyncio async def test_gemini_chat_with_tools_emits_first_chunk_before_completion(): provider = GeminiProvider() @@ -784,6 +1215,36 @@ def close(self): assert response.metadata.get("streamed_content") is True +def test_gemini_convert_messages_for_tools_reuses_thought_signature(): + provider = GeminiProvider() + + system_content, gemini_messages = provider._convert_messages_for_tools( + [ + Message( + role="assistant", + content="Working on it.", + tool_calls=[ + { + "id": "call_123", + "type": "function", + "function": { + "name": "get_weather", + "arguments": '{"city":"Paris"}', + }, + "metadata": { + "thought_signature": "c2lnLTEyMw==", + }, + } + ], + ) + ] + ) + + assert system_content == "" + assert gemini_messages[0].parts[1].function_call.name == "get_weather" + assert gemini_messages[0].parts[1].thought_signature == b"sig-123" + + @pytest.mark.asyncio async def test_gemini_chat_stream_yields_incrementally(): provider = GeminiProvider() @@ -835,6 +1296,65 @@ def close(self): assert second.finish_reason == "stop" +@pytest.mark.asyncio +async def test_gemini_chat_stream_yields_thinking_before_visible_content(): + provider = GeminiProvider() + provider.api_key = "test-key" + provider.model = "gemini-2.5-pro" + + responses = [ + SimpleNamespace( + candidates=[ + SimpleNamespace( + content=SimpleNamespace( + parts=[ + SimpleNamespace(text="Plan: inspect.", thought=True), + SimpleNamespace(text="Done."), + ] + ), + finish_reason="STOP", + ) + ], + usage_metadata=None, + ), + ] + + class _FakeGeminiClient: + def __init__(self, api_key: str): + self.aio = SimpleNamespace( + models=SimpleNamespace( + generate_content_stream=AsyncMock(return_value=_AsyncItems(responses)) + ), + aclose=AsyncMock(return_value=None), + ) + + def close(self): + return None + + with patch("spoon_ai.llm.providers.gemini_provider.genai.Client", _FakeGeminiClient): + chunks = [ + chunk + async for chunk in provider.chat_stream( + messages=[Message(role="user", content="hi")], + thinking_budget=128, + ) + ] + + assert chunks[0].delta == "Plan: inspect." + assert chunks[0].metadata == { + "chunk_index": 0, + "type": "thinking", + "phase": "think", + "provider": "gemini", + "channel": "thinking", + } + assert chunks[1].delta == "Done." + assert chunks[1].metadata == { + "chunk_index": 1, + "finish_reason": "stop", + } + + @pytest.mark.asyncio async def test_gemini_chat_with_tools_keeps_tool_calls_finish_reason_when_native_is_stop(): provider = GeminiProvider()