Skip to content

Commit 4069b90

Browse files
committed
allow prefixing of identifiers
1 parent b61a438 commit 4069b90

File tree

5 files changed

+79
-5
lines changed

5 files changed

+79
-5
lines changed

pyenzyme/fetcher/pdb.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ def fetch_pdb(
194194
"""
195195
# Get PDB data
196196
client = PDBClient()
197+
198+
# Allow prefixing with 'PDB:'
199+
if pdb_id.lower().startswith("pdb:"):
200+
pdb_id = pdb_id.split(":", 1)[-1]
201+
197202
pdb_response = client.get_entry_by_id(pdb_id)
198203

199204
if not pdb_response:

pyenzyme/fetcher/pubchem.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def extract_by_preference(
145145

146146

147147
def fetch_pubchem(
148-
cid: int,
148+
cid: str,
149149
smallmol_id: Optional[str] = None,
150150
vessel_id: Optional[str] = None,
151151
) -> v2.SmallMolecule:
@@ -163,9 +163,13 @@ def fetch_pubchem(
163163
Raises:
164164
ValueError: If the PubChem API request fails or required data is missing
165165
"""
166-
query = PubChemClient.from_cid(cid)
166+
# Allow prefixing with 'CID:'
167+
if cid.lower().startswith("pubchem:"):
168+
cid = cid.split(":", 1)[-1]
169+
170+
query = PubChemClient.from_cid(int(cid))
167171
pc_compound = query.pc_compounds[0]
168-
name = _extract_name(pc_compound, cid)
172+
name = _extract_name(pc_compound, int(cid))
169173

170174
if not smallmol_id:
171175
smallmol_id = process_id(name)

pyenzyme/fetcher/uniprot.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ def fetch_uniprot(
129129
ConnectionError: If the connection to the UniProt server fails
130130
"""
131131
client = UniProtClient()
132+
133+
# Allow prefixing with 'uniprot:'
134+
if uniprot_id.lower().startswith("uniprot:"):
135+
uniprot_id = uniprot_id.split(":", 1)[-1]
136+
132137
uniprot_entry = client.get_entry_by_id(uniprot_id)
133138

134139
if not uniprot_entry:

tests/integration/test_composer.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,25 @@ def test_compose(self):
2323
expected_doc = pe.read_enzymeml("tests/fixtures/compose/expected_compose.json")
2424
assert to_dict_wo_json_ld(doc) == to_dict_wo_json_ld(expected_doc)
2525

26+
def test_compose_with_prefix(self):
27+
# Act
28+
doc = pe.compose(
29+
name="test",
30+
vessel=pe.Vessel(
31+
id="vessel",
32+
name="vessel",
33+
volume=1.0,
34+
unit="ml", # type: ignore
35+
),
36+
proteins=["pdb:1A23"],
37+
small_molecules=["CHEBI:32551"],
38+
reactions=["RHEA:22864"],
39+
)
40+
41+
# Assert
42+
expected_doc = pe.read_enzymeml("tests/fixtures/compose/expected_compose.json")
43+
assert to_dict_wo_json_ld(doc) == to_dict_wo_json_ld(expected_doc)
44+
2645
def test_compose_no_vessel(self):
2746
# Act
2847
doc = pe.compose(

tests/integration/test_fetcher.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,22 @@ def test_fetch_uniprot_to_protein(self):
5757
assert len(protein.references) == 1
5858
assert protein.references[0] == "https://www.uniprot.org/uniprotkb/P07327"
5959

60+
def test_fetch_uniprot_to_protein_with_prefix(self):
61+
protein = fetch_uniprot("uniprot:P07327")
62+
assert protein is not None
63+
64+
assert protein.id == "alcohol_dehydrogenase_1a"
65+
assert protein.name == "Alcohol dehydrogenase 1A"
66+
assert protein.constant is True
67+
assert protein.ecnumber == "1.1.1.1"
68+
assert protein.organism == "Homo sapiens"
69+
assert protein.organism_tax_id == "9606"
70+
71+
assert protein.ld_id == "uniprot:P07327"
72+
assert "uniprot:P07327" in protein.ld_type
73+
assert len(protein.references) == 1
74+
assert protein.references[0] == "https://www.uniprot.org/uniprotkb/P07327"
75+
6076
def test_fetch_uniprot_to_protein_with_id(self):
6177
protein = fetch_uniprot("P07327", protein_id="p1")
6278
assert protein is not None
@@ -103,7 +119,19 @@ def test_fetch_rhea_to_reaction_invalid_id(self):
103119
fetch_rhea("INVALID_ID")
104120

105121
def test_fetch_pubchem_to_small_molecule(self):
106-
small_molecule = fetch_pubchem(cid=2244)
122+
small_molecule = fetch_pubchem(cid="2244")
123+
assert small_molecule is not None
124+
assert small_molecule.id == "2_acetyloxybenzoic_acid"
125+
assert small_molecule.name == "2-acetyloxybenzoic acid"
126+
assert small_molecule.canonical_smiles == "CC(=O)OC1=CC=CC=C1C(=O)O"
127+
assert small_molecule.inchikey == "BSYNRYMUTXBXSQ-UHFFFAOYSA-N"
128+
assert (
129+
small_molecule.inchi
130+
== "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)"
131+
)
132+
133+
def test_fetch_pubchem_to_small_molecule_with_prefix(self):
134+
small_molecule = fetch_pubchem(cid="pubchem:2244")
107135
assert small_molecule is not None
108136
assert small_molecule.id == "2_acetyloxybenzoic_acid"
109137
assert small_molecule.name == "2-acetyloxybenzoic acid"
@@ -116,7 +144,7 @@ def test_fetch_pubchem_to_small_molecule(self):
116144

117145
def test_fetch_pubchem_to_small_molecule_invalid_id(self):
118146
with pytest.raises(ValueError):
119-
fetch_pubchem(cid=162176127617627)
147+
fetch_pubchem(cid="162176127617627")
120148

121149
def test_fetch_pdb_to_protein(self):
122150
protein = fetch_pdb("1a23")
@@ -131,6 +159,19 @@ def test_fetch_pdb_to_protein(self):
131159
== "AQYEDGKQYTTLEKPVAGAPQVLEFFSFFCPHCYQFEEVLHISDNVKKKLPEGVKMTKYHVNFMGGDLGKDLTQAWAVAMALGVEDKVTVPLFEGVQKTQTIRSASDIRDVFINAGIKGEEYDAAWNSFVVKSLVAQQEKAAADVQLRGVPAMFVNGKYQLNPQGMDTSNMDVFVQQYADTVKYLSEKK"
132160
)
133161

162+
def test_fetch_pdb_to_protein_with_prefix(self):
163+
protein = fetch_pdb("pdb:1a23")
164+
assert protein is not None
165+
assert protein.id == "1a23_1"
166+
assert (
167+
protein.name
168+
== "SOLUTION NMR STRUCTURE OF REDUCED DSBA FROM ESCHERICHIA COLI, MINIMIZED AVERAGE STRUCTURE"
169+
)
170+
assert (
171+
protein.sequence
172+
== "AQYEDGKQYTTLEKPVAGAPQVLEFFSFFCPHCYQFEEVLHISDNVKKKLPEGVKMTKYHVNFMGGDLGKDLTQAWAVAMALGVEDKVTVPLFEGVQKTQTIRSASDIRDVFINAGIKGEEYDAAWNSFVVKSLVAQQEKAAADVQLRGVPAMFVNGKYQLNPQGMDTSNMDVFVQQYADTVKYLSEKK"
173+
)
174+
134175
def test_fetch_pdb_to_protein_invalid_id(self):
135176
with pytest.raises(ValueError):
136177
fetch_pdb("INVALID_ID")

0 commit comments

Comments
 (0)