66"""
77
88import re
9- from typing import Dict , List , Optional
9+ from typing import List , Optional
1010
1111import httpx
12- from pydantic import BaseModel , ConfigDict , RootModel
12+ from pydantic import BaseModel , ConfigDict , Field
1313
1414from pyenzyme .versions import v2
1515
@@ -24,73 +24,61 @@ def __init__(self, message: str, cause: Optional[Exception] = None):
2424 self .cause = cause
2525
2626
27- class ChEBIStructure (BaseModel ):
28- """Chemical structure information."""
29-
30- model_config = ConfigDict (extra = "ignore" )
31-
32- smiles : Optional [str ] = None
33- standard_inchi : Optional [str ] = None
34- standard_inchi_key : Optional [str ] = None
35-
36-
37- class ChEBIEntryData (BaseModel ):
38- """Core data structure for a ChEBI entry."""
27+ class ChebiSearchSource (BaseModel ):
28+ """Source data structure from ChEBI search API result."""
3929
4030 model_config = ConfigDict (extra = "ignore" )
4131
32+ chebi_accession : str
33+ name : Optional [str ] = None
4234 ascii_name : str
43- default_structure : Optional [ChEBIStructure ] = None
44-
45-
46- class ChEBIEntryResult (BaseModel ):
47- """Individual ChEBI entry result."""
48-
49- model_config = ConfigDict (extra = "ignore" )
50-
51- standardized_chebi_id : str
52- data : ChEBIEntryData
53-
54-
55- class ChEBIApiResponse (RootModel [Dict [str , ChEBIEntryResult ]]):
56- """Top-level response structure from ChEBI API. Maps ChEBI IDs to their corresponding entry data."""
57-
58- root : Dict [str , ChEBIEntryResult ]
35+ smiles : Optional [str ] = None
36+ inchi : Optional [str ] = None
37+ inchikey : Optional [str ] = None
38+ definition : Optional [str ] = None
39+ formula : Optional [str ] = None
40+ charge : Optional [int ] = None
41+ mass : Optional [float ] = None
42+ monoisotopicmass : Optional [float ] = None
43+ stars : Optional [int ] = None
44+ default_structure : Optional [int ] = None
45+ structures : Optional [List [int ]] = None
5946
6047
6148class ChebiSearchResult (BaseModel ):
6249 """Individual search result structure."""
6350
64- model_config = ConfigDict (extra = "ignore" )
51+ model_config = ConfigDict (extra = "ignore" , populate_by_name = True )
6552
66- _source : Dict [ str , str ] # Contains chebi_accession field
53+ source : ChebiSearchSource = Field ( alias = "_source" )
6754
6855
6956class ChebiSearchResponse (BaseModel ):
7057 """Search response structure from ChEBI search API."""
7158
7259 results : List [ChebiSearchResult ]
60+ total : int
61+ number_pages : int
7362
7463
7564class ChEBIClient :
7665 """Client for accessing the ChEBI API to fetch chemical entity data."""
7766
78- BASE_URL = "https://www.ebi.ac.uk/chebi/backend/api/public/compounds/"
7967 SEARCH_URL = "https://www.ebi.ac.uk/chebi/backend/api/public/es_search/"
8068
8169 def __init__ (self ):
8270 """Initialize the ChEBI client."""
8371 pass
8472
85- def get_entry_by_id (self , chebi_id : str ) -> ChEBIEntryResult :
73+ def get_entry_by_id (self , chebi_id : str ) -> ChebiSearchSource :
8674 """
87- Fetch a ChEBI entry by its ID.
75+ Fetch a ChEBI entry by its ID using the search API .
8876
8977 Args:
9078 chebi_id: The ChEBI ID to fetch, can be with or without the 'CHEBI:' prefix
9179
9280 Returns:
93- ChEBIEntryResult object with the parsed response data
81+ ChebiSearchSource object with the parsed response data
9482
9583 Raises:
9684 ChEBIError: If the ChEBI ID is invalid or not found
@@ -101,21 +89,18 @@ def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
10189
10290 try :
10391 with httpx .Client (timeout = DEFAULT_TIMEOUT ) as client :
104- url = self . BASE_URL . format ( chebi_id )
105- response = client .get (url )
92+ params = { "term" : chebi_id , "page" : "1" , "size" : "1" }
93+ response = client .get (self . SEARCH_URL , params = params )
10694 response .raise_for_status ()
10795
10896 if response .status_code == 200 :
10997 try :
110- raw_response_data = response .json ()
98+ search_response = ChebiSearchResponse ( ** response .json () )
11199
112- if not raw_response_data or len (raw_response_data ) == 0 :
100+ if not search_response . results or len (search_response . results ) == 0 :
113101 raise ChEBIError (f"No data found for ChEBI ID { chebi_id } " )
114102
115- chebi_response = ChEBIApiResponse (raw_response_data )
116-
117- entry = list (chebi_response .root .values ())[0 ]
118- return entry
103+ return search_response .results [0 ].source
119104
120105 except Exception as e :
121106 if isinstance (e , ChEBIError ):
@@ -127,15 +112,15 @@ def get_entry_by_id(self, chebi_id: str) -> ChEBIEntryResult:
127112 except httpx .HTTPStatusError as e :
128113 raise ChEBIError (f"Failed to fetch ChEBI ID { chebi_id } : { str (e )} " , e )
129114
130- def get_entries_batch (self , chebi_ids : List [str ]) -> List [ChEBIEntryResult ]:
115+ def get_entries_batch (self , chebi_ids : List [str ]) -> List [ChebiSearchSource ]:
131116 """
132- Fetch multiple ChEBI entries by their IDs.
117+ Fetch multiple ChEBI entries by their IDs using the search API .
133118
134119 Args:
135120 chebi_ids: List of ChEBI IDs to fetch
136121
137122 Returns:
138- List of ChEBIEntryResult objects with data from ChEBI
123+ List of ChebiSearchSource objects with data from ChEBI
139124
140125 Raises:
141126 ChEBIError: If any ChEBI ID is invalid or not found
@@ -144,74 +129,51 @@ def get_entries_batch(self, chebi_ids: List[str]) -> List[ChEBIEntryResult]:
144129 if not chebi_ids :
145130 return []
146131
147- formatted_ids = []
132+ results = []
148133 for chebi_id in chebi_ids :
149- if not chebi_id .startswith ("CHEBI:" ):
150- formatted_ids .append (f"CHEBI:{ chebi_id } " )
151- else :
152- formatted_ids .append (chebi_id )
153-
154- try :
155- with httpx .Client (timeout = DEFAULT_TIMEOUT ) as client :
156- url = self .BASE_URL .format (chebi_id )
157- response = client .get (url )
158- response .raise_for_status ()
159-
160- if response .status_code == 200 :
161- try :
162- raw_response_data = response .json ()
163- chebi_response = ChEBIApiResponse (raw_response_data )
164- return list (chebi_response .root .values ())
165-
166- except Exception as e :
167- raise ChEBIError (
168- f"Failed to parse ChEBI batch response: { str (e )} " , e
169- )
170- else :
171- raise ChEBIError (f"HTTP { response .status_code } : { response .text } " )
134+ try :
135+ entry = self .get_entry_by_id (chebi_id )
136+ results .append (entry )
137+ except ChEBIError as e :
138+ # Continue with other IDs even if one fails
139+ raise ChEBIError (f"Failed to fetch ChEBI ID { chebi_id } : { str (e )} " , e )
172140
173- except httpx .HTTPStatusError as e :
174- raise ChEBIError (f"Failed to fetch ChEBI batch: { str (e )} " , e )
141+ return results
175142
176143 def search_entries (
177- self , query : str , size : Optional [int ] = None
178- ) -> List [ChEBIEntryResult ]:
144+ self , query : str , size : Optional [int ] = None , page : int = 1
145+ ) -> List [ChebiSearchSource ]:
179146 """
180147 Search for ChEBI entries by query string.
181148
182149 Args:
183150 query: The search query string to find ChEBI entries
184151 size: The maximum number of search results to return
152+ page: The page number to retrieve (default: 1)
185153
186154 Returns:
187- List of ChEBIEntryResult objects for matching entries
155+ List of ChebiSearchSource objects for matching entries
188156
189157 Raises:
190158 ChEBIError: If the search request fails or the API is unavailable
191159 """
192- params = {"term" : query }
160+ params = {"term" : query , "page" : str ( page ) }
193161 if size :
194162 params ["size" ] = str (size )
195163
196164 try :
197165 with httpx .Client (timeout = DEFAULT_TIMEOUT ) as client :
198- url = self .SEARCH_URL
199- response = client .get (url , params = params )
166+ response = client .get (self .SEARCH_URL , params = params )
200167 response .raise_for_status ()
201168
202169 if response .status_code == 200 :
203170 try :
204- search_results = ChebiSearchResponse (** response .json ())
171+ search_response = ChebiSearchResponse (** response .json ())
205172
206- if not search_results .results :
173+ if not search_response .results :
207174 return []
208175
209- chebi_ids = [
210- result ._source ["chebi_accession" ]
211- for result in search_results .results
212- ]
213-
214- return self .get_entries_batch (chebi_ids )
176+ return [result .source for result in search_response .results ]
215177
216178 except Exception as e :
217179 if isinstance (e , ChEBIError ):
@@ -226,44 +188,39 @@ def search_entries(
226188 raise ChEBIError (f"Failed to search ChEBI: { str (e )} " , e )
227189
228190
229- def process_chebi_entry ( entry : ChEBIEntryResult ) -> v2 .SmallMolecule :
191+ def process_search_result ( source : ChebiSearchSource ) -> v2 .SmallMolecule :
230192 """
231- Process a ChEBI entry result and convert it to a SmallMolecule object.
193+ Process a ChEBI search result source and convert it to a SmallMolecule object.
232194
233195 Args:
234- entry : The ChEBI entry result from the API
196+ source : The ChEBI search result source from the API
235197
236198 Returns:
237199 A SmallMolecule object with mapped data
238200 """
239- smallmol_id = process_id (entry .data .ascii_name )
240-
241- structure = entry .data .default_structure
242- canonical_smiles = structure .smiles if structure else None
243- inchi = structure .standard_inchi if structure else None
244- inchikey = structure .standard_inchi_key if structure else None
201+ smallmol_id = process_id (source .ascii_name )
245202
246203 small_molecule = v2 .SmallMolecule (
247204 id = smallmol_id ,
248- name = entry . data .ascii_name ,
249- canonical_smiles = canonical_smiles ,
250- inchi = inchi ,
251- inchikey = inchikey ,
205+ name = source .ascii_name ,
206+ canonical_smiles = source . smiles ,
207+ inchi = source . inchi ,
208+ inchikey = source . inchikey ,
252209 constant = False ,
253210 vessel_id = None ,
254211 synonymous_names = [],
255212 references = [
256- f"https://www.ebi.ac.uk/chebi/searchId.do?chebiId={ entry . standardized_chebi_id } "
213+ f"https://www.ebi.ac.uk/chebi/searchId.do?chebiId={ source . chebi_accession } "
257214 ],
258215 )
259216
260217 small_molecule .add_type_term (
261- term = f"OBO:{ entry . standardized_chebi_id .replace (':' , '_' )} " ,
218+ term = f"OBO:{ source . chebi_accession .replace (':' , '_' )} " ,
262219 prefix = "OBO" ,
263220 iri = "http://purl.obolibrary.org/obo/" ,
264221 )
265222
266- small_molecule .ld_id = f"OBO:{ entry . standardized_chebi_id .replace (':' , '_' )} "
223+ small_molecule .ld_id = f"OBO:{ source . chebi_accession .replace (':' , '_' )} "
267224
268225 return small_molecule
269226
@@ -290,9 +247,9 @@ def fetch_chebi(
290247 """
291248 try :
292249 client = ChEBIClient ()
293- chebi_entry = client .get_entry_by_id (chebi_id )
250+ chebi_source = client .get_entry_by_id (chebi_id )
294251
295- small_molecule = process_chebi_entry ( chebi_entry )
252+ small_molecule = process_search_result ( chebi_source )
296253
297254 if smallmol_id is not None :
298255 small_molecule .id = smallmol_id
@@ -325,9 +282,9 @@ def fetch_chebi_batch(chebi_ids: List[str]) -> List[v2.SmallMolecule]:
325282 return []
326283
327284 client = ChEBIClient ()
328- chebi_entries = client .get_entries_batch (chebi_ids )
285+ chebi_sources = client .get_entries_batch (chebi_ids )
329286
330- return [process_chebi_entry ( entry ) for entry in chebi_entries ]
287+ return [process_search_result ( source ) for source in chebi_sources ]
331288
332289
333290def search_chebi (query : str , size : Optional [int ] = None ) -> List [v2 .SmallMolecule ]:
@@ -355,9 +312,9 @@ def search_chebi(query: str, size: Optional[int] = None) -> List[v2.SmallMolecul
355312 atp_results = search_chebi('ATP', 5)
356313 """
357314 client = ChEBIClient ()
358- chebi_entries = client .search_entries (query , size )
315+ chebi_sources = client .search_entries (query , size )
359316
360- return [process_chebi_entry ( entry ) for entry in chebi_entries ]
317+ return [process_search_result ( source ) for source in chebi_sources ]
361318
362319
363320def process_id (name : str ) -> str :
0 commit comments