Merge pull request #98 from EnzymeML/fix-numpy-dep

JR-1991 · web-flow · commit cc113aaec7bf · 2026-01-15T01:25:20.000+01:00
Fix `numpy&gt;2.0` dependency and update fetchers
diff --git a/.github/workflows/remote-tests.yaml b/.github/workflows/remote-tests.yaml
@@ -9,7 +9,7 @@ jobs:
       max-parallel: 4
       fail-fast: false
       matrix:
-        python-version: ['3.10', '3.11', '3.12', '3.13']
+        python-version: ['3.11', '3.12', '3.13']
 
     steps:
       - name: Checkout
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
@@ -9,7 +9,7 @@ jobs:
       max-parallel: 4
       fail-fast: false
       matrix:
-        python-version: ['3.10', '3.11', '3.12', '3.13']
+        python-version: ['3.11', '3.12', '3.13']
 
     steps:
       - name: Checkout
diff --git a/pyenzyme/fetcher/chebi.py b/pyenzyme/fetcher/chebi.py
@@ -6,10 +6,10 @@
 """
 
 import re
-from typing import Dict, List, Optional
+from typing import List, Optional
 
 import httpx
-from pydantic import BaseModel, ConfigDict, RootModel
+from pydantic import BaseModel, ConfigDict, Field
 
 from pyenzyme.versions import v2
 
@@ -24,73 +24,61 @@ def __init__(self, message: str, cause: Optional[Exception] = None):
         self.cause = cause
 
 
-class ChEBIStructure(BaseModel):
-    """Chemical structure information."""
-
-    model_config = ConfigDict(extra="ignore")
-
-    smiles: Optional[str] = None
-    standard_inchi: Optional[str] = None
-    standard_inchi_key: Optional[str] = None
-
-
-class ChEBIEntryData(BaseModel):
-    """Core data structure for a ChEBI entry."""
+class ChebiSearchSource(BaseModel):
+    """Source data structure from ChEBI search API result."""
 
     model_config = ConfigDict(extra="ignore")
 
+    chebi_accession: str
+    name: Optional[str] = None
     ascii_name: str
-    default_structure: Optional[ChEBIStructure] = None
-
-
-class ChEBIEntryResult(BaseModel):
-    """Individual ChEBI entry result."""
-
-    model_config = ConfigDict(extra="ignore")
-
-    standardized_chebi_id: str
-    data: ChEBIEntryData
-
-
-class ChEBIApiResponse(RootModel[Dict[str, ChEBIEntryResult]]):
-    """Top-level response structure from ChEBI API. Maps ChEBI IDs to their corresponding entry data."""
-
-    root: Dict[str, ChEBIEntryResult]
+    smiles: Optional[str] = None
+    inchi: Optional[str] = None
+    inchikey: Optional[str] = None
+    definition: Optional[str] = None
+    formula: Optional[str] = None
+    charge: Optional[int] = None
+    mass: Optional[float] = None
+    monoisotopicmass: Optional[float] = None
+    stars: Optional[int] = None
+    default_structure: Optional[int] = None
+    structures: Optional[List[int]] = None
 
 
 class ChebiSearchResult(BaseModel):
     """Individual search result structure."""
 
-    model_config = ConfigDict(extra="ignore")
+    model_config = ConfigDict(extra="ignore", populate_by_name=True)
 
-    _source: Dict[str, str]  # Contains chebi_accession field
+    source: ChebiSearchSource = Field(alias="_source")
 
 
 class ChebiSearchResponse(BaseModel):
     """Search response structure from ChEBI search API."""
 
     results: List[ChebiSearchResult]
+    total: int
+    number_pages: int
 
 
 class ChEBIClient:
     """Client for accessing the ChEBI API to fetch chemical entity data."""
 
-    BASE_URL = "https://www.ebi.ac.uk/chebi/backend/api/public/compounds/"
     SEARCH_URL = "https://www.ebi.ac.uk/chebi/backend/api/public/es_search/"
 
     def __init__(self):
         """Initialize the ChEBI client."""
         pass
 
-    def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
+    def get_entry_by_id(self, chebi_id: str) -> ChebiSearchSource:
         """
-        Fetch a ChEBI entry by its ID.
+        Fetch a ChEBI entry by its ID using the search API.
 
         Args:
             chebi_id: The ChEBI ID to fetch, can be with or without the 'CHEBI:' prefix
 
         Returns:
-            ChEBIEntryResult object with the parsed response data
+            ChebiSearchSource object with the parsed response data
 
         Raises:
             ChEBIError: If the ChEBI ID is invalid or not found
@@ -101,21 +89,18 @@ def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
 
         try:
             with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
-                url = self.BASE_URL.format(chebi_id)
-                response = client.get(url)
+                params = {"term": chebi_id, "page": "1", "size": "1"}
+                response = client.get(self.SEARCH_URL, params=params)
                 response.raise_for_status()
 
             if response.status_code == 200:
                 try:
-                    raw_response_data = response.json()
+                    search_response = ChebiSearchResponse(**response.json())
 
-                    if not raw_response_data or len(raw_response_data) == 0:
+                    if not search_response.results or len(search_response.results) == 0:
                         raise ChEBIError(f"No data found for ChEBI ID {chebi_id}")
 
-                    chebi_response = ChEBIApiResponse(raw_response_data)
-
-                    entry = list(chebi_response.root.values())[0]
-                    return entry
+                    return search_response.results[0].source
 
                 except Exception as e:
                     if isinstance(e, ChEBIError):
@@ -127,15 +112,15 @@ def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
         except httpx.HTTPStatusError as e:
             raise ChEBIError(f"Failed to fetch ChEBI ID {chebi_id}: {str(e)}", e)
 
-    def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
+    def get_entries_batch(self, chebi_ids: List[str]) -> List[ChebiSearchSource]:
         """
-        Fetch multiple ChEBI entries by their IDs.
+        Fetch multiple ChEBI entries by their IDs using the search API.
 
         Args:
             chebi_ids: List of ChEBI IDs to fetch
 
         Returns:
-            List of ChEBIEntryResult objects with data from ChEBI
+            List of ChebiSearchSource objects with data from ChEBI
 
         Raises:
             ChEBIError: If any ChEBI ID is invalid or not found
@@ -144,74 +129,51 @@ def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
         if not chebi_ids:
             return []
 
-        formatted_ids = []
+        results = []
         for chebi_id in chebi_ids:
-            if not chebi_id.startswith("CHEBI:"):
-                formatted_ids.append(f"CHEBI:{chebi_id}")
-            else:
-                formatted_ids.append(chebi_id)
-
-        try:
-            with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
-                url = self.BASE_URL.format(chebi_id)
-                response = client.get(url)
-                response.raise_for_status()
-
-            if response.status_code == 200:
-                try:
-                    raw_response_data = response.json()
-                    chebi_response = ChEBIApiResponse(raw_response_data)
-                    return list(chebi_response.root.values())
-
-                except Exception as e:
-                    raise ChEBIError(
-                        f"Failed to parse ChEBI batch response: {str(e)}", e
-                    )
-            else:
-                raise ChEBIError(f"HTTP {response.status_code}: {response.text}")
+            try:
+                entry = self.get_entry_by_id(chebi_id)
+                results.append(entry)
+            except ChEBIError as e:
+                # Continue with other IDs even if one fails
+                raise ChEBIError(f"Failed to fetch ChEBI ID {chebi_id}: {str(e)}", e)
 
-        except httpx.HTTPStatusError as e:
-            raise ChEBIError(f"Failed to fetch ChEBI batch: {str(e)}", e)
+        return results
 
     def search_entries(
-        self, query: str, size: Optional[int] = None
-    ) -> List[ChEBIEntryResult]:
+        self, query: str, size: Optional[int] = None, page: int = 1
+    ) -> List[ChebiSearchSource]:
         """
         Search for ChEBI entries by query string.
 
         Args:
             query: The search query string to find ChEBI entries
             size: The maximum number of search results to return
+            page: The page number to retrieve (default: 1)
 
         Returns:
-            List of ChEBIEntryResult objects for matching entries
+            List of ChebiSearchSource objects for matching entries
 
         Raises:
             ChEBIError: If the search request fails or the API is unavailable
         """
-        params = {"term": query}
+        params = {"term": query, "page": str(page)}
         if size:
             params["size"] = str(size)
 
         try:
             with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
-                url = self.SEARCH_URL
-                response = client.get(url, params=params)
+                response = client.get(self.SEARCH_URL, params=params)
             response.raise_for_status()
 
             if response.status_code == 200:
                 try:
-                    search_results = ChebiSearchResponse(**response.json())
+                    search_response = ChebiSearchResponse(**response.json())
 
-                    if not search_results.results:
+                    if not search_response.results:
                         return []
 
-                    chebi_ids = [
-                        result._source["chebi_accession"]
-                        for result in search_results.results
-                    ]
-
-                    return self.get_entries_batch(chebi_ids)
+                    return [result.source for result in search_response.results]
 
                 except Exception as e:
                     if isinstance(e, ChEBIError):
@@ -226,44 +188,39 @@ def search_entries(
             raise ChEBIError(f"Failed to search ChEBI: {str(e)}", e)
 
 
-def process_chebi_entry(entry: ChEBIEntryResult) -> v2.SmallMolecule:
+def process_search_result(source: ChebiSearchSource) -> v2.SmallMolecule:
     """
-    Process a ChEBI entry result and convert it to a SmallMolecule object.
+    Process a ChEBI search result source and convert it to a SmallMolecule object.
 
     Args:
-        entry: The ChEBI entry result from the API
+        source: The ChEBI search result source from the API
 
     Returns:
         A SmallMolecule object with mapped data
     """
-    smallmol_id = process_id(entry.data.ascii_name)
-
-    structure = entry.data.default_structure
-    canonical_smiles = structure.smiles if structure else None
-    inchi = structure.standard_inchi if structure else None
-    inchikey = structure.standard_inchi_key if structure else None
+    smallmol_id = process_id(source.ascii_name)
 
     small_molecule = v2.SmallMolecule(
         id=smallmol_id,
-        name=entry.data.ascii_name,
-        canonical_smiles=canonical_smiles,
-        inchi=inchi,
-        inchikey=inchikey,
+        name=source.ascii_name,
+        canonical_smiles=source.smiles,
+        inchi=source.inchi,
+        inchikey=source.inchikey,
         constant=False,
         vessel_id=None,
         synonymous_names=[],
         references=[
-            f"https://www.ebi.ac.uk/chebi/searchId.do?chebiId={entry.standardized_chebi_id}"
+            f"https://www.ebi.ac.uk/chebi/searchId.do?chebiId={source.chebi_accession}"
         ],
     )
 
     small_molecule.add_type_term(
-        term=f"OBO:{entry.standardized_chebi_id.replace(':', '_')}",
+        term=f"OBO:{source.chebi_accession.replace(':', '_')}",
         prefix="OBO",
         iri="http://purl.obolibrary.org/obo/",
     )
 
-    small_molecule.ld_id = f"OBO:{entry.standardized_chebi_id.replace(':', '_')}"
+    small_molecule.ld_id = f"OBO:{source.chebi_accession.replace(':', '_')}"
 
     return small_molecule
 
@@ -290,9 +247,9 @@ def fetch_chebi(
     """
     try:
         client = ChEBIClient()
-        chebi_entry = client.get_entry_by_id(chebi_id)
+        chebi_source = client.get_entry_by_id(chebi_id)
 
-        small_molecule = process_chebi_entry(chebi_entry)
+        small_molecule = process_search_result(chebi_source)
 
         if smallmol_id is not None:
             small_molecule.id = smallmol_id
@@ -325,9 +282,9 @@ def fetch_chebi_batch(chebi_ids: List[str]) -> List[v2.SmallMolecule]:
         return []
 
     client = ChEBIClient()
-    chebi_entries = client.get_entries_batch(chebi_ids)
+    chebi_sources = client.get_entries_batch(chebi_ids)
 
-    return [process_chebi_entry(entry) for entry in chebi_entries]
+    return [process_search_result(source) for source in chebi_sources]
 
 
 def search_chebi(query: str, size: Optional[int] = None) -> List[v2.SmallMolecule]:
@@ -355,9 +312,9 @@ def search_chebi(query: str, size: Optional[int] = None) -> List[v2.SmallMolecul
         atp_results = search_chebi('ATP', 5)
     """
     client = ChEBIClient()
-    chebi_entries = client.search_entries(query, size)
+    chebi_sources = client.search_entries(query, size)
 
-    return [process_chebi_entry(entry) for entry in chebi_entries]
+    return [process_search_result(source) for source in chebi_sources]
 
 
 def process_id(name: str) -> str:
diff --git a/pyenzyme/fetcher/pubchem.py b/pyenzyme/fetcher/pubchem.py
@@ -98,9 +98,6 @@ def from_cid(cid: int) -> PubChemQuery:
             response = client.get(url)
             response.raise_for_status()
 
-        if response.status_code != 200:
-            raise ValueError(f"Failed to fetch PubChem data for CID {cid}")
-
         return PubChemQuery(**response.json())
 
     @staticmethod
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ name = "pyenzyme"
 version = "2.2.1"
 description = "A Python library for EnzymeML"
 authors = [{ name = "Jan Range", email = "range.jan@web.de" }]
-requires-python = ">=3.10,<4"
+requires-python = ">=3.11,<4"
 readme = "README.md"
 license = { text = "BSD-2-Clause" }
 dependencies = [
@@ -17,7 +17,7 @@ dependencies = [
     "fastobo>=0.13.0,<0.14",
     "pymetadata>=0.5.3,<0.6",
     "httpx>=0.27",
-    "mdmodels>=0.2.1,<0.3",
+    "mdmodels>=0.2.4,<0.3",
     "joblib>=1.5.0,<2",
     "bokeh>=3.7.3,<4",
     "matplotlib>=3.10,<4",
diff --git a/tests/integration/test_fetcher.py b/tests/integration/test_fetcher.py
@@ -1,3 +1,4 @@
+import httpx
 import pytest
 
 from pyenzyme.fetcher.chebi import fetch_chebi
@@ -155,7 +156,7 @@ def test_fetch_pubchem_to_small_molecule_with_prefix(self):
 
     @pytest.mark.remote
     def test_fetch_pubchem_to_small_molecule_invalid_id(self):
-        with pytest.raises(ValueError):
+        with pytest.raises(httpx.HTTPStatusError):
             fetch_pubchem(cid="162176127617627")
 
     @pytest.mark.remote
diff --git a/tests/integration/test_legacy.py b/tests/integration/test_legacy.py
diff --git a/uv.lock b/uv.lock