Skip to content

Commit da2736d

Browse files
committed
Update fix_compartments in curate.py #121
1 parent 81b2d1b commit da2736d

File tree

2 files changed

+52
-7
lines changed

2 files changed

+52
-7
lines changed

src/refinegems/curation/curate.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,16 @@ def add_compartment_structure_specs(model: libModel) -> None:
469469
"""
470470
for compartment in model.getListOfCompartments():
471471

472+
if not compartment.isSetMetaId():
473+
compartment.setMetaId(f"meta_{compartment.getId()}")
474+
475+
# Physical compartment is most likely case
476+
if not compartment.isSetSBOTerm():
477+
if (compartment.getId() == 'uc') or 'unknown' in compartment.getName().lower():
478+
compartment.setSBOTerm("SBO:0000410") # implicit compartment
479+
else:
480+
compartment.setSBOTerm('SBO:0000290') # physical compartment
481+
472482
if not compartment.isSetSize():
473483
compartment.setSize(float("NaN"))
474484

@@ -504,16 +514,36 @@ def fix_compartments(model: libModel) -> libModel:
504514
# If any metabolite has no compartment
505515
if comps_missing:
506516
# Get compartment list (for consistency)
507-
comps_in_model = model.getListOfCompartments()
517+
comps_in_model = set([c.getId() for c in model.getListOfCompartments()])
518+
metab_comps = set()
508519
for m in model.getListOfSpecies():
509-
comp_from_id = m.getId().split('_')[-1]
510-
if (comp_from_id in comps_in_model) or (comp_from_id in VALID_COMPARTMENTS):
511-
m.setCompartment(comp_from_id)
520+
comp_from_id = m.getId().split('_')[-1].strip() # In case of whitespace
521+
if (comp_from_id in comps_in_model) or (comp_from_id in VALID_COMPARTMENTS.keys()):
522+
m.setCompartment(comp_from_id) # Set compartment from id
523+
metab_comps.add(comp_from_id)
524+
512525
else:
513526
# No compartment in id found, using unknown
514527
default_comp = 'uc'
515-
logging.WARNING(f'Compartment for metabolite {m.getId()} not found, setting to {default_comp}:{VALID_COMPARTMENTS["uc"]}')
528+
logging.warning(f'Compartment for metabolite {m.getId()} not found, setting to {default_comp}:{VALID_COMPARTMENTS["uc"]}')
516529
m.setCompartment(default_comp)
530+
metab_comps.add(default_comp)
531+
532+
# Check if any compartment assigned to a metabolite is missing in the compartment list
533+
missing_comps = metab_comps - comps_in_model # Comps missing in model
534+
if missing_comps: # If any comps missing add to model
535+
for c in missing_comps:
536+
# Create new compartment based on the id found in the metabolite id
537+
new_comp = model.createCompartment()
538+
new_comp.setId(c)
539+
new_comp.setName(VALID_COMPARTMENTS[c])
540+
new_comp.setMetaId(f'meta_{c}')
541+
542+
comps_to_remove = comps_in_model - metab_comps # Comps in model that are not used by any metabolite
543+
if comps_to_remove: # If any comps to remove
544+
for c in comps_to_remove:
545+
logging.warning(f'Removing compartment {c} as no metabolite is assigned to it.')
546+
model.removeCompartment(c)
517547

518548
# Check validity of compartment IDs & adjust if necessary
519549
resolve_compartment_names(model)

src/refinegems/utility/entities.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,13 @@
1515
import pandas as pd
1616
import re
1717
import requests
18+
import subprocess
19+
import tempfile
1820
import urllib
1921
import warnings
2022

23+
from .io import load_model, write_model_to_file
24+
2125
from Bio import Entrez
2226
from Bio.KEGG import REST, Compound
2327
from cobra.io.sbml import _f_gene
@@ -155,22 +159,33 @@ def resolve_compartment_names(model: Union[cobra.Model, libModel]) -> None:
155159
# only compartments IN the model will be added
156160
model.compartments = VALID_COMPARTMENTS
157161

162+
# @TEST: Needs further testing
158163
case libModel():
159164
# for each metabolite rename the compartment
160165
for metabolite in model.getListOfSpecies():
161166
metabolite.setCompartment(COMP_MAPPING[metabolite.getCompartment()])
162-
167+
168+
comp_map = {}
163169
# for each compartment rename the compartment ID
164170
for comp in model.getListOfCompartments():
165-
new_id = COMP_MAPPING[comp.getId()]
171+
current_id = comp.getId()
172+
new_id = COMP_MAPPING[current_id]
166173
comp.setId(new_id)
174+
# comp_map[current_id] = new_id
167175

168176
# add whole descriptions of the compartments to the model
169177
# note:
170178
# only compartments IN the model will be added
171179
if comp.getId() in VALID_COMPARTMENTS:
172180
comp.setName(VALID_COMPARTMENTS[comp.getId()])
173181

182+
# # fix ID references in the whole file
183+
# with tempfile.NamedTemporaryFile(suffix=".xml") as tmp:
184+
# write_model_to_file(model, tmp.name)
185+
# for current_id, new_id in tqdm(comp_map.items()):
186+
# subprocess.run(['sed', '-i', "''",f's/{current_id}/{new_id}/g', tmp.name])
187+
# model = load_model(tmp.name, "libsbml")
188+
174189
case _:
175190
raise TypeError(f"Unknown model object type: {type(model)}. Must be one of (cobra.Model, libsbml.Model).")
176191

0 commit comments

Comments
 (0)