diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 1378a17dbb8b6..81f4a25076fa2 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -11,8 +11,8 @@ from operator import itemgetter from typing import TYPE_CHECKING, Any, Literal, cast -from pydantic import BaseModel, ConfigDict, Field -from typing_extensions import override +from pydantic import BaseModel, ConfigDict, Field, model_validator +from typing_extensions import Self, override from langchain_core.caches import BaseCache from langchain_core.callbacks import ( @@ -32,7 +32,10 @@ LangSmithParams, LanguageModelInput, ) -from langchain_core.language_models.model_profile import ModelProfile +from langchain_core.language_models.model_profile import ( + ModelProfile, + _warn_unknown_profile_keys, +) from langchain_core.load import dumpd, dumps from langchain_core.messages import ( AIMessage, @@ -357,6 +360,46 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC): arbitrary_types_allowed=True, ) + def _resolve_model_profile(self) -> ModelProfile | None: + """Resolve the default model profile for this model. + + Override in subclasses to provide auto-populated profile data. + + Subclasses that override this method do not need to define their own + `_set_model_profile` validator — the base class validator will call this + method automatically. + + Returns: + A `ModelProfile` dict, or `None` if no default profile is available. + """ + return None + + @model_validator(mode="after") + def _set_model_profile(self) -> Self: + """Set model profile if not overridden. + + Subclasses can either: + + - Override `_resolve_model_profile` (recommended) and inherit this + validator, or + - Override this validator directly (existing behavior, replaces this + implementation in Pydantic v2). + """ + if self.profile is None: + self.profile = self._resolve_model_profile() + return self + + @model_validator(mode="after") + def _check_profile_keys(self) -> Self: + """Warn on unrecognized profile keys. + + Uses a distinct method name so that partner subclasses that override + `_set_model_profile` do not inadvertently suppress this check. + """ + if self.profile: + _warn_unknown_profile_keys(self.profile) + return self + @cached_property def _serialized(self) -> dict[str, Any]: # self is always a Serializable object in this case, thus the result is diff --git a/libs/core/langchain_core/language_models/model_profile.py b/libs/core/langchain_core/language_models/model_profile.py index 8e1c6b4e21a1b..6c32e3f41948f 100644 --- a/libs/core/langchain_core/language_models/model_profile.py +++ b/libs/core/langchain_core/language_models/model_profile.py @@ -1,5 +1,10 @@ """Model profile types and utilities.""" +import contextlib +import warnings +from typing import get_type_hints + +from pydantic import ConfigDict from typing_extensions import TypedDict @@ -14,6 +19,25 @@ class ModelProfile(TypedDict, total=False): and supported features. """ + __pydantic_config__ = ConfigDict(extra="allow") # type: ignore[misc] + + # --- Model metadata --- + + name: str + """Human-readable model name.""" + + status: str + """Model status (e.g., `'active'`, `'deprecated'`).""" + + release_date: str + """Model release date (ISO 8601 format, e.g., `'2025-06-01'`).""" + + last_updated: str + """Date the model was last updated (ISO 8601 format).""" + + open_weights: bool + """Whether the model weights are openly available.""" + # --- Input constraints --- max_input_tokens: int @@ -86,6 +110,60 @@ class ModelProfile(TypedDict, total=False): """Whether the model supports a native [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs) feature""" + # --- Other capabilities --- + + attachment: bool + """Whether the model supports file attachments.""" + + temperature: bool + """Whether the model supports a temperature parameter.""" + ModelProfileRegistry = dict[str, ModelProfile] """Registry mapping model identifiers or names to their ModelProfile.""" + + +# Cache for ModelProfile's declared field names. Populated lazily because +# _warn_unknown_profile_keys runs on every chat model construction and +# get_type_hints is not free. +_DECLARED_PROFILE_KEYS: frozenset[str] | None = None + + +def _get_declared_profile_keys() -> frozenset[str]: + """Return the declared `ModelProfile` field names, cached after first call.""" + global _DECLARED_PROFILE_KEYS # noqa: PLW0603 + if _DECLARED_PROFILE_KEYS is None: + _DECLARED_PROFILE_KEYS = frozenset(get_type_hints(ModelProfile).keys()) + return _DECLARED_PROFILE_KEYS + + +def _warn_unknown_profile_keys(profile: ModelProfile) -> None: + """Emit a warning if a profile dict contains keys not declared in `ModelProfile`. + + This function must never raise — it is called during model construction and + a failure here would prevent all chat model instantiation. + + Args: + profile: Model profile dict to check. + """ + try: + declared = _get_declared_profile_keys() + except Exception: + # If introspection fails (e.g. forward ref issues), skip rather than + # crash model construction. + return + + extra = sorted(set(profile) - declared) + if extra: + # warnings.warn() raises when the user (or a test framework like + # pytest) configures warnings-as-errors (-W error / + # warnings.simplefilter("error")). Suppress so we honour the + # "must never raise" contract — this runs during every chat model + # construction. + with contextlib.suppress(Exception): + warnings.warn( + f"Unrecognized keys in model profile: {extra}. " + f"This may indicate a version mismatch between langchain-core " + f"and your provider package. Consider upgrading langchain-core.", + stacklevel=2, + ) diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index f0280b2ce81b3..cf47ec8d54fec 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -6,7 +6,8 @@ from typing import TYPE_CHECKING, Any, Literal import pytest -from typing_extensions import override +from pydantic import model_validator +from typing_extensions import Self, override from langchain_core.callbacks import ( CallbackManagerForLLMRun, @@ -22,6 +23,7 @@ FakeListChatModelError, GenericFakeChatModel, ) +from langchain_core.language_models.model_profile import ModelProfile from langchain_core.messages import ( AIMessage, AIMessageChunk, @@ -1230,6 +1232,109 @@ def test_model_profiles() -> None: assert model_with_profile.profile == {"max_input_tokens": 100} +def test_model_profile_extra_keys_accepted() -> None: + """extra='allow' on ModelProfile means unknown keys don't crash.""" + model = GenericFakeChatModel( + messages=iter([]), + profile={"max_input_tokens": 100, "unknown_future_field": True}, + ) + assert model.profile is not None + assert model.profile.get("unknown_future_field") is True + + +def test_check_profile_keys_warns_on_unknown() -> None: + """_check_profile_keys validator warns for undeclared profile keys.""" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + GenericFakeChatModel( + messages=iter([]), + profile={"max_input_tokens": 100, "unknown_field": True}, + ) + + profile_warnings = [x for x in w if "Unrecognized keys" in str(x.message)] + assert len(profile_warnings) == 1 + assert "unknown_field" in str(profile_warnings[0].message) + + +def test_check_profile_keys_silent_on_valid() -> None: + """_check_profile_keys validator does not warn for declared keys.""" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + GenericFakeChatModel( + messages=iter([]), + profile={"max_input_tokens": 100, "tool_calling": True}, + ) + + profile_warnings = [x for x in w if "Unrecognized keys" in str(x.message)] + assert len(profile_warnings) == 0 + + +def test_check_profile_keys_runs_despite_partner_override() -> None: + """Verify _check_profile_keys fires even when _set_model_profile is overridden. + + Uses a distinct validator name so partner overrides do not suppress it. + """ + + class PartnerModel(GenericFakeChatModel): + """Simulates a partner that overrides _set_model_profile.""" + + @model_validator(mode="after") + def _set_model_profile(self) -> Self: + if self.profile is None: + profile: dict[str, Any] = { + "max_input_tokens": 100, + "partner_only_field": True, + } + self.profile = profile # type: ignore[assignment] + return self + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + model = PartnerModel(messages=iter([])) + + assert model.profile is not None + assert model.profile.get("partner_only_field") is True + profile_warnings = [x for x in w if "Unrecognized keys" in str(x.message)] + assert len(profile_warnings) == 1 + assert "partner_only_field" in str(profile_warnings[0].message) + + +def test_resolve_model_profile_auto_populates() -> None: + """Base _set_model_profile validator auto-populates from _resolve_model_profile.""" + + class AutoProfileModel(GenericFakeChatModel): + def _resolve_model_profile(self) -> ModelProfile | None: + return {"max_input_tokens": 42, "tool_calling": True} + + model = AutoProfileModel(messages=iter([])) + assert model.profile is not None + assert model.profile["max_input_tokens"] == 42 + assert model.profile["tool_calling"] is True + + +def test_explicit_profile_not_overwritten_by_resolve() -> None: + """Explicit profile= kwarg takes precedence over _resolve_model_profile.""" + + class AutoProfileModel(GenericFakeChatModel): + def _resolve_model_profile(self) -> ModelProfile | None: + return {"max_input_tokens": 42} + + model = AutoProfileModel(messages=iter([]), profile={"max_input_tokens": 999}) + assert model.profile is not None + assert model.profile["max_input_tokens"] == 999 + + +def test_resolve_model_profile_none_leaves_profile_none() -> None: + """Subclass returning None from _resolve_model_profile leaves profile as None.""" + + class NoProfileModel(GenericFakeChatModel): + def _resolve_model_profile(self) -> ModelProfile | None: + return None + + model = NoProfileModel(messages=iter([])) + assert model.profile is None + + class MockResponse: """Mock response for testing _generate_response_from_error.""" diff --git a/libs/core/tests/unit_tests/language_models/test_model_profile.py b/libs/core/tests/unit_tests/language_models/test_model_profile.py new file mode 100644 index 0000000000000..2f6a94027729c --- /dev/null +++ b/libs/core/tests/unit_tests/language_models/test_model_profile.py @@ -0,0 +1,146 @@ +"""Tests for model profile types and utilities.""" + +import warnings +from typing import Any, ClassVar, get_type_hints +from unittest.mock import patch + +import pytest +from pydantic import BaseModel, ConfigDict, Field + +from langchain_core.language_models.model_profile import ( + ModelProfile, + _warn_unknown_profile_keys, +) + + +def _profile_with_extra(**extra: Any) -> ModelProfile: + """Build a ModelProfile with extra keys (bypasses static type checking).""" + base: dict[str, Any] = {"max_input_tokens": 100} + base.update(extra) + return base # type: ignore[return-value] + + +class TestModelProfileExtraAllow: + """Verify extra='allow' on ModelProfile TypedDict.""" + + def test_accepts_declared_keys(self) -> None: + profile: ModelProfile = {"max_input_tokens": 100, "tool_calling": True} + assert profile["max_input_tokens"] == 100 + + def test_accepts_extra_keys_at_runtime(self) -> None: + profile = _profile_with_extra(unknown_future_field="value") + assert profile["unknown_future_field"] == "value" # type: ignore[typeddict-item] + + def test_extra_keys_survive_pydantic_validation(self) -> None: + """Extra keys pass through even when parent model forbids extras.""" + + class StrictModel(BaseModel): + model_config = ConfigDict(extra="forbid") + profile: ModelProfile | None = Field(default=None) + + m = StrictModel( + profile={ + "max_input_tokens": 100, + "unknown_future_field": True, + } + ) + assert m.profile is not None + assert m.profile.get("unknown_future_field") is True + + +class TestWarnUnknownProfileKeys: + """Tests for _warn_unknown_profile_keys.""" + + def test_warns_on_extra_keys(self) -> None: + profile = _profile_with_extra(future_field=True, another="val") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + _warn_unknown_profile_keys(profile) + + assert len(w) == 1 + assert "another" in str(w[0].message) + assert "future_field" in str(w[0].message) + assert "upgrading langchain-core" in str(w[0].message) + + def test_silent_on_declared_keys_only(self) -> None: + profile: ModelProfile = {"max_input_tokens": 100, "tool_calling": True} + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + _warn_unknown_profile_keys(profile) + + assert len(w) == 0 + + def test_silent_on_empty_profile(self) -> None: + profile: ModelProfile = {} + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + _warn_unknown_profile_keys(profile) + + assert len(w) == 0 + + def test_survives_get_type_hints_failure(self) -> None: + """Must never crash — falls back to silent skip.""" + profile = _profile_with_extra(extra=True) + with patch( + "langchain_core.language_models.model_profile.get_type_hints", + side_effect=TypeError("broken"), + ): + # Should not raise + _warn_unknown_profile_keys(profile) + + def test_all_current_declared_fields_recognized(self) -> None: + """Sanity check: all declared fields are recognized as declared.""" + hints = get_type_hints(ModelProfile) + profile: dict[str, Any] = {} + for key, typ in hints.items(): + if typ is bool: + profile[key] = True + elif typ is int: + profile[key] = 100 + elif typ is str: + profile[key] = "test" + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + _warn_unknown_profile_keys(profile) # type: ignore[arg-type] + + assert len(w) == 0 + + +class TestModelProfileFields: + """Verify expected fields exist on ModelProfile.""" + + _declared: ClassVar[set[str]] = set(get_type_hints(ModelProfile).keys()) + + @pytest.mark.parametrize( + "field", + [ + "name", + "status", + "release_date", + "last_updated", + "open_weights", + "max_input_tokens", + "max_output_tokens", + "text_inputs", + "image_inputs", + "audio_inputs", + "video_inputs", + "text_outputs", + "image_outputs", + "audio_outputs", + "video_outputs", + "tool_calling", + "tool_choice", + "structured_output", + "attachment", + "temperature", + "image_url_inputs", + "image_tool_message", + "pdf_tool_message", + "pdf_inputs", + "reasoning_output", + ], + ) + def test_field_declared(self, field: str) -> None: + assert field in self._declared diff --git a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py index 91ef582ef4a9e..1ae973c7ad456 100644 --- a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py +++ b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py @@ -388,36 +388,13 @@ def token_counter(messages: Iterable[MessageLikeRepresentation]) -> int: def test_summarization_middleware_missing_profile() -> None: - """Ensure automatic profile inference falls back when profiles are unavailable.""" - - class ImportErrorProfileModel(BaseChatModel): - @override - def _generate( - self, - messages: list[BaseMessage], - stop: list[str] | None = None, - run_manager: CallbackManagerForLLMRun | None = None, - **kwargs: Any, - ) -> ChatResult: - raise NotImplementedError - - @property - def _llm_type(self) -> str: - return "mock" - - # NOTE: Using __getattribute__ because @property cannot override Pydantic fields. - def __getattribute__(self, name: str) -> Any: - if name == "profile": - msg = "Profile not available" - raise AttributeError(msg) - return super().__getattribute__(name) - + """Ensure fractional limits fail when model has no profile data.""" with pytest.raises( ValueError, match="Model profile information is required to use fractional token limits", ): _ = SummarizationMiddleware( - model=ImportErrorProfileModel(), trigger=("fraction", 0.5), keep=("messages", 1) + model=MockChatModel(), trigger=("fraction", 0.5), keep=("messages", 1) ) diff --git a/libs/model-profiles/langchain_model_profiles/cli.py b/libs/model-profiles/langchain_model_profiles/cli.py index 58be483edf09b..e3e866b5a8d42 100644 --- a/libs/model-profiles/langchain_model_profiles/cli.py +++ b/libs/model-profiles/langchain_model_profiles/cli.py @@ -5,6 +5,7 @@ import re import sys import tempfile +import warnings from pathlib import Path from typing import Any @@ -150,6 +151,44 @@ def _apply_overrides( return merged +def _warn_undeclared_profile_keys( + profiles: dict[str, dict[str, Any]], +) -> None: + """Warn if any profile keys are not declared in `ModelProfile`. + + Requires `langchain-core` to be installed. If it is not available the + check is silently skipped (`langchain-core` is a test dependency, not a + runtime dependency of this package). + + Args: + profiles: Mapping of model IDs to their profile dicts. + """ + try: + from langchain_core.language_models.model_profile import ModelProfile + except ImportError: + # langchain-core is a test dep, not a runtime dep; skip check. + return + + from typing import get_type_hints + + declared = set(get_type_hints(ModelProfile).keys()) + + all_keys: set[str] = set() + for profile in profiles.values(): + all_keys.update(profile.keys()) + + extra = sorted(all_keys - declared) + if extra: + warnings.warn( + f"Profile keys not declared in langchain_core ModelProfile: {extra}. " + f"Add these fields to " + f"langchain_core.language_models.model_profile.ModelProfile and " + f"release langchain-core before publishing partner packages that " + f"use these profiles.", + stacklevel=2, + ) + + def _ensure_safe_output_path(base_dir: Path, output_file: Path) -> None: """Ensure the resolved output path remains inside the expected directory.""" if base_dir.exists() and base_dir.is_symlink(): @@ -300,6 +339,9 @@ def refresh(provider: str, data_dir: Path) -> None: # noqa: C901, PLR0915 for model_id in sorted(extra_models): profiles[model_id] = _apply_overrides({}, provider_aug, model_augs[model_id]) + # Warn about profile keys not declared in ModelProfile + _warn_undeclared_profile_keys(profiles) + # Ensure directory exists try: data_dir.mkdir(parents=True, exist_ok=True, mode=0o755) diff --git a/libs/model-profiles/tests/unit_tests/test_cli.py b/libs/model-profiles/tests/unit_tests/test_cli.py index 6c265d37acc59..1cbc773827f06 100644 --- a/libs/model-profiles/tests/unit_tests/test_cli.py +++ b/libs/model-profiles/tests/unit_tests/test_cli.py @@ -2,9 +2,11 @@ import importlib.util from pathlib import Path +from typing import get_type_hints from unittest.mock import Mock, patch import pytest +from langchain_core.language_models.model_profile import ModelProfile from langchain_model_profiles.cli import _model_data_to_profile, refresh @@ -364,3 +366,52 @@ def test_model_data_to_profile_text_modalities() -> None: profile = _model_data_to_profile(image_gen_model) assert profile["text_inputs"] is True assert profile["text_outputs"] is False + + +def test_model_data_to_profile_keys_subset_of_model_profile() -> None: + """Every key emitted by _model_data_to_profile must be declared in ModelProfile. + + If this test fails, a new field was added to `_model_data_to_profile` in the CLI + without a matching field in `langchain_core.language_models.ModelProfile`. Add + the field to `ModelProfile` and release langchain-core BEFORE refreshing partner + profiles. While `ModelProfile` uses `extra='allow'` so Pydantic won't reject + unknown keys at runtime, undeclared fields lack type annotations, won't appear + in IDE autocompletion, and are invisible to static analysis. + """ + # Build a model_data dict with every possible field populated so + # _model_data_to_profile includes all keys it can emit. + model_data = { + "id": "test-model", + "name": "Test Model", + "status": "active", + "release_date": "2025-01-01", + "last_updated": "2025-01-01", + "open_weights": True, + "reasoning": True, + "tool_call": True, + "tool_choice": True, + "structured_output": True, + "attachment": True, + "temperature": True, + "image_url_inputs": True, + "image_tool_message": True, + "pdf_tool_message": True, + "pdf_inputs": True, + "limit": {"context": 100000, "output": 4096}, + "modalities": { + "input": ["text", "image", "audio", "video", "pdf"], + "output": ["text", "image", "audio", "video"], + }, + } + + profile = _model_data_to_profile(model_data) + declared_fields = set(get_type_hints(ModelProfile).keys()) + emitted_fields = set(profile.keys()) + extra = emitted_fields - declared_fields + + assert not extra, ( + f"CLI emits profile keys not declared in ModelProfile: {sorted(extra)}. " + f"Add these fields to langchain_core.language_models.model_profile." + f"ModelProfile and release langchain-core before refreshing partner " + f"profiles." + ) diff --git a/libs/model-profiles/uv.lock b/libs/model-profiles/uv.lock index e9bf34bde89d9..67ac78f40c160 100644 --- a/libs/model-profiles/uv.lock +++ b/libs/model-profiles/uv.lock @@ -459,7 +459,7 @@ wheels = [ [[package]] name = "langchain" -version = "1.2.12" +version = "1.2.13" source = { editable = "../langchain_v1" } dependencies = [ { name = "langchain-core" }, @@ -477,6 +477,7 @@ requires-dist = [ { name = "langchain-anthropic", marker = "extra == 'anthropic'", editable = "../partners/anthropic" }, { name = "langchain-aws", marker = "extra == 'aws'" }, { name = "langchain-azure-ai", marker = "extra == 'azure-ai'" }, + { name = "langchain-baseten", marker = "extra == 'baseten'", specifier = ">=0.2.0" }, { name = "langchain-community", marker = "extra == 'community'" }, { name = "langchain-core", editable = "../core" }, { name = "langchain-deepseek", marker = "extra == 'deepseek'" }, @@ -494,7 +495,7 @@ requires-dist = [ { name = "langgraph", specifier = ">=1.1.1,<1.2.0" }, { name = "pydantic", specifier = ">=2.7.4,<3.0.0" }, ] -provides-extras = ["community", "anthropic", "openai", "azure-ai", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "deepseek", "xai", "perplexity"] +provides-extras = ["community", "anthropic", "openai", "azure-ai", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "baseten", "deepseek", "xai", "perplexity"] [package.metadata.requires-dev] lint = [{ name = "ruff", specifier = ">=0.15.0,<0.16.0" }]