Skip to content

Commit cc113aa

Browse files
authored
Merge pull request #98 from EnzymeML/fix-numpy-dep
Fix `numpy>2.0` dependency and update fetchers
2 parents 9f8f0d2 + 6be9388 commit cc113aa

File tree

8 files changed

+109
-688
lines changed

8 files changed

+109
-688
lines changed

.github/workflows/remote-tests.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
max-parallel: 4
1010
fail-fast: false
1111
matrix:
12-
python-version: ['3.10', '3.11', '3.12', '3.13']
12+
python-version: ['3.11', '3.12', '3.13']
1313

1414
steps:
1515
- name: Checkout

.github/workflows/unit-tests.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
max-parallel: 4
1010
fail-fast: false
1111
matrix:
12-
python-version: ['3.10', '3.11', '3.12', '3.13']
12+
python-version: ['3.11', '3.12', '3.13']
1313

1414
steps:
1515
- name: Checkout

pyenzyme/fetcher/chebi.py

Lines changed: 66 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
"""
77

88
import re
9-
from typing import Dict, List, Optional
9+
from typing import List, Optional
1010

1111
import httpx
12-
from pydantic import BaseModel, ConfigDict, RootModel
12+
from pydantic import BaseModel, ConfigDict, Field
1313

1414
from pyenzyme.versions import v2
1515

@@ -24,73 +24,61 @@ def __init__(self, message: str, cause: Optional[Exception] = None):
2424
self.cause = cause
2525

2626

27-
class ChEBIStructure(BaseModel):
28-
"""Chemical structure information."""
29-
30-
model_config = ConfigDict(extra="ignore")
31-
32-
smiles: Optional[str] = None
33-
standard_inchi: Optional[str] = None
34-
standard_inchi_key: Optional[str] = None
35-
36-
37-
class ChEBIEntryData(BaseModel):
38-
"""Core data structure for a ChEBI entry."""
27+
class ChebiSearchSource(BaseModel):
28+
"""Source data structure from ChEBI search API result."""
3929

4030
model_config = ConfigDict(extra="ignore")
4131

32+
chebi_accession: str
33+
name: Optional[str] = None
4234
ascii_name: str
43-
default_structure: Optional[ChEBIStructure] = None
44-
45-
46-
class ChEBIEntryResult(BaseModel):
47-
"""Individual ChEBI entry result."""
48-
49-
model_config = ConfigDict(extra="ignore")
50-
51-
standardized_chebi_id: str
52-
data: ChEBIEntryData
53-
54-
55-
class ChEBIApiResponse(RootModel[Dict[str, ChEBIEntryResult]]):
56-
"""Top-level response structure from ChEBI API. Maps ChEBI IDs to their corresponding entry data."""
57-
58-
root: Dict[str, ChEBIEntryResult]
35+
smiles: Optional[str] = None
36+
inchi: Optional[str] = None
37+
inchikey: Optional[str] = None
38+
definition: Optional[str] = None
39+
formula: Optional[str] = None
40+
charge: Optional[int] = None
41+
mass: Optional[float] = None
42+
monoisotopicmass: Optional[float] = None
43+
stars: Optional[int] = None
44+
default_structure: Optional[int] = None
45+
structures: Optional[List[int]] = None
5946

6047

6148
class ChebiSearchResult(BaseModel):
6249
"""Individual search result structure."""
6350

64-
model_config = ConfigDict(extra="ignore")
51+
model_config = ConfigDict(extra="ignore", populate_by_name=True)
6552

66-
_source: Dict[str, str] # Contains chebi_accession field
53+
source: ChebiSearchSource = Field(alias="_source")
6754

6855

6956
class ChebiSearchResponse(BaseModel):
7057
"""Search response structure from ChEBI search API."""
7158

7259
results: List[ChebiSearchResult]
60+
total: int
61+
number_pages: int
7362

7463

7564
class ChEBIClient:
7665
"""Client for accessing the ChEBI API to fetch chemical entity data."""
7766

78-
BASE_URL = "https://www.ebi.ac.uk/chebi/backend/api/public/compounds/"
7967
SEARCH_URL = "https://www.ebi.ac.uk/chebi/backend/api/public/es_search/"
8068

8169
def __init__(self):
8270
"""Initialize the ChEBI client."""
8371
pass
8472

85-
def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
73+
def get_entry_by_id(self, chebi_id: str) -> ChebiSearchSource:
8674
"""
87-
Fetch a ChEBI entry by its ID.
75+
Fetch a ChEBI entry by its ID using the search API.
8876
8977
Args:
9078
chebi_id: The ChEBI ID to fetch, can be with or without the 'CHEBI:' prefix
9179
9280
Returns:
93-
ChEBIEntryResult object with the parsed response data
81+
ChebiSearchSource object with the parsed response data
9482
9583
Raises:
9684
ChEBIError: If the ChEBI ID is invalid or not found
@@ -101,21 +89,18 @@ def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
10189

10290
try:
10391
with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
104-
url = self.BASE_URL.format(chebi_id)
105-
response = client.get(url)
92+
params = {"term": chebi_id, "page": "1", "size": "1"}
93+
response = client.get(self.SEARCH_URL, params=params)
10694
response.raise_for_status()
10795

10896
if response.status_code == 200:
10997
try:
110-
raw_response_data = response.json()
98+
search_response = ChebiSearchResponse(**response.json())
11199

112-
if not raw_response_data or len(raw_response_data) == 0:
100+
if not search_response.results or len(search_response.results) == 0:
113101
raise ChEBIError(f"No data found for ChEBI ID {chebi_id}")
114102

115-
chebi_response = ChEBIApiResponse(raw_response_data)
116-
117-
entry = list(chebi_response.root.values())[0]
118-
return entry
103+
return search_response.results[0].source
119104

120105
except Exception as e:
121106
if isinstance(e, ChEBIError):
@@ -127,15 +112,15 @@ def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
127112
except httpx.HTTPStatusError as e:
128113
raise ChEBIError(f"Failed to fetch ChEBI ID {chebi_id}: {str(e)}", e)
129114

130-
def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
115+
def get_entries_batch(self, chebi_ids: List[str]) -> List[ChebiSearchSource]:
131116
"""
132-
Fetch multiple ChEBI entries by their IDs.
117+
Fetch multiple ChEBI entries by their IDs using the search API.
133118
134119
Args:
135120
chebi_ids: List of ChEBI IDs to fetch
136121
137122
Returns:
138-
List of ChEBIEntryResult objects with data from ChEBI
123+
List of ChebiSearchSource objects with data from ChEBI
139124
140125
Raises:
141126
ChEBIError: If any ChEBI ID is invalid or not found
@@ -144,74 +129,51 @@ def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
144129
if not chebi_ids:
145130
return []
146131

147-
formatted_ids = []
132+
results = []
148133
for chebi_id in chebi_ids:
149-
if not chebi_id.startswith("CHEBI:"):
150-
formatted_ids.append(f"CHEBI:{chebi_id}")
151-
else:
152-
formatted_ids.append(chebi_id)
153-
154-
try:
155-
with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
156-
url = self.BASE_URL.format(chebi_id)
157-
response = client.get(url)
158-
response.raise_for_status()
159-
160-
if response.status_code == 200:
161-
try:
162-
raw_response_data = response.json()
163-
chebi_response = ChEBIApiResponse(raw_response_data)
164-
return list(chebi_response.root.values())
165-
166-
except Exception as e:
167-
raise ChEBIError(
168-
f"Failed to parse ChEBI batch response: {str(e)}", e
169-
)
170-
else:
171-
raise ChEBIError(f"HTTP {response.status_code}: {response.text}")
134+
try:
135+
entry = self.get_entry_by_id(chebi_id)
136+
results.append(entry)
137+
except ChEBIError as e:
138+
# Continue with other IDs even if one fails
139+
raise ChEBIError(f"Failed to fetch ChEBI ID {chebi_id}: {str(e)}", e)
172140

173-
except httpx.HTTPStatusError as e:
174-
raise ChEBIError(f"Failed to fetch ChEBI batch: {str(e)}", e)
141+
return results
175142

176143
def search_entries(
177-
self, query: str, size: Optional[int] = None
178-
) -> List[ChEBIEntryResult]:
144+
self, query: str, size: Optional[int] = None, page: int = 1
145+
) -> List[ChebiSearchSource]:
179146
"""
180147
Search for ChEBI entries by query string.
181148
182149
Args:
183150
query: The search query string to find ChEBI entries
184151
size: The maximum number of search results to return
152+
page: The page number to retrieve (default: 1)
185153
186154
Returns:
187-
List of ChEBIEntryResult objects for matching entries
155+
List of ChebiSearchSource objects for matching entries
188156
189157
Raises:
190158
ChEBIError: If the search request fails or the API is unavailable
191159
"""
192-
params = {"term": query}
160+
params = {"term": query, "page": str(page)}
193161
if size:
194162
params["size"] = str(size)
195163

196164
try:
197165
with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
198-
url = self.SEARCH_URL
199-
response = client.get(url, params=params)
166+
response = client.get(self.SEARCH_URL, params=params)
200167
response.raise_for_status()
201168

202169
if response.status_code == 200:
203170
try:
204-
search_results = ChebiSearchResponse(**response.json())
171+
search_response = ChebiSearchResponse(**response.json())
205172

206-
if not search_results.results:
173+
if not search_response.results:
207174
return []
208175

209-
chebi_ids = [
210-
result._source["chebi_accession"]
211-
for result in search_results.results
212-
]
213-
214-
return self.get_entries_batch(chebi_ids)
176+
return [result.source for result in search_response.results]
215177

216178
except Exception as e:
217179
if isinstance(e, ChEBIError):
@@ -226,44 +188,39 @@ def search_entries(
226188
raise ChEBIError(f"Failed to search ChEBI: {str(e)}", e)
227189

228190

229-
def process_chebi_entry(entry: ChEBIEntryResult) -> v2.SmallMolecule:
191+
def process_search_result(source: ChebiSearchSource) -> v2.SmallMolecule:
230192
"""
231-
Process a ChEBI entry result and convert it to a SmallMolecule object.
193+
Process a ChEBI search result source and convert it to a SmallMolecule object.
232194
233195
Args:
234-
entry: The ChEBI entry result from the API
196+
source: The ChEBI search result source from the API
235197
236198
Returns:
237199
A SmallMolecule object with mapped data
238200
"""
239-
smallmol_id = process_id(entry.data.ascii_name)
240-
241-
structure = entry.data.default_structure
242-
canonical_smiles = structure.smiles if structure else None
243-
inchi = structure.standard_inchi if structure else None
244-
inchikey = structure.standard_inchi_key if structure else None
201+
smallmol_id = process_id(source.ascii_name)
245202

246203
small_molecule = v2.SmallMolecule(
247204
id=smallmol_id,
248-
name=entry.data.ascii_name,
249-
canonical_smiles=canonical_smiles,
250-
inchi=inchi,
251-
inchikey=inchikey,
205+
name=source.ascii_name,
206+
canonical_smiles=source.smiles,
207+
inchi=source.inchi,
208+
inchikey=source.inchikey,
252209
constant=False,
253210
vessel_id=None,
254211
synonymous_names=[],
255212
references=[
256-
f"https://www.ebi.ac.uk/chebi/searchId.do?chebiId={entry.standardized_chebi_id}"
213+
f"https://www.ebi.ac.uk/chebi/searchId.do?chebiId={source.chebi_accession}"
257214
],
258215
)
259216

260217
small_molecule.add_type_term(
261-
term=f"OBO:{entry.standardized_chebi_id.replace(':', '_')}",
218+
term=f"OBO:{source.chebi_accession.replace(':', '_')}",
262219
prefix="OBO",
263220
iri="http://purl.obolibrary.org/obo/",
264221
)
265222

266-
small_molecule.ld_id = f"OBO:{entry.standardized_chebi_id.replace(':', '_')}"
223+
small_molecule.ld_id = f"OBO:{source.chebi_accession.replace(':', '_')}"
267224

268225
return small_molecule
269226

@@ -290,9 +247,9 @@ def fetch_chebi(
290247
"""
291248
try:
292249
client = ChEBIClient()
293-
chebi_entry = client.get_entry_by_id(chebi_id)
250+
chebi_source = client.get_entry_by_id(chebi_id)
294251

295-
small_molecule = process_chebi_entry(chebi_entry)
252+
small_molecule = process_search_result(chebi_source)
296253

297254
if smallmol_id is not None:
298255
small_molecule.id = smallmol_id
@@ -325,9 +282,9 @@ def fetch_chebi_batch(chebi_ids: List[str]) -> List[v2.SmallMolecule]:
325282
return []
326283

327284
client = ChEBIClient()
328-
chebi_entries = client.get_entries_batch(chebi_ids)
285+
chebi_sources = client.get_entries_batch(chebi_ids)
329286

330-
return [process_chebi_entry(entry) for entry in chebi_entries]
287+
return [process_search_result(source) for source in chebi_sources]
331288

332289

333290
def search_chebi(query: str, size: Optional[int] = None) -> List[v2.SmallMolecule]:
@@ -355,9 +312,9 @@ def search_chebi(query: str, size: Optional[int] = None) -> List[v2.SmallMolecul
355312
atp_results = search_chebi('ATP', 5)
356313
"""
357314
client = ChEBIClient()
358-
chebi_entries = client.search_entries(query, size)
315+
chebi_sources = client.search_entries(query, size)
359316

360-
return [process_chebi_entry(entry) for entry in chebi_entries]
317+
return [process_search_result(source) for source in chebi_sources]
361318

362319

363320
def process_id(name: str) -> str:

pyenzyme/fetcher/pubchem.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,6 @@ def from_cid(cid: int) -> PubChemQuery:
9898
response = client.get(url)
9999
response.raise_for_status()
100100

101-
if response.status_code != 200:
102-
raise ValueError(f"Failed to fetch PubChem data for CID {cid}")
103-
104101
return PubChemQuery(**response.json())
105102

106103
@staticmethod

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "pyenzyme"
33
version = "2.2.1"
44
description = "A Python library for EnzymeML"
55
authors = [{ name = "Jan Range", email = "range.jan@web.de" }]
6-
requires-python = ">=3.10,<4"
6+
requires-python = ">=3.11,<4"
77
readme = "README.md"
88
license = { text = "BSD-2-Clause" }
99
dependencies = [
@@ -17,7 +17,7 @@ dependencies = [
1717
"fastobo>=0.13.0,<0.14",
1818
"pymetadata>=0.5.3,<0.6",
1919
"httpx>=0.27",
20-
"mdmodels>=0.2.1,<0.3",
20+
"mdmodels>=0.2.4,<0.3",
2121
"joblib>=1.5.0,<2",
2222
"bokeh>=3.7.3,<4",
2323
"matplotlib>=3.10,<4",

tests/integration/test_fetcher.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import httpx
12
import pytest
23

34
from pyenzyme.fetcher.chebi import fetch_chebi
@@ -155,7 +156,7 @@ def test_fetch_pubchem_to_small_molecule_with_prefix(self):
155156

156157
@pytest.mark.remote
157158
def test_fetch_pubchem_to_small_molecule_invalid_id(self):
158-
with pytest.raises(ValueError):
159+
with pytest.raises(httpx.HTTPStatusError):
159160
fetch_pubchem(cid="162176127617627")
160161

161162
@pytest.mark.remote

0 commit comments

Comments
 (0)