Skip to content

Commit d7ec01d

Browse files
authored
Merge pull request #84 from EnzymeML/retain-unmodeled-species
Retain unmodeled species
2 parents 5f6cdac + 4ab90b1 commit d7ec01d

File tree

4 files changed

+258
-5
lines changed

4 files changed

+258
-5
lines changed

pyenzyme/thinlayers/base.py

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,107 @@ class BaseThinLayer(ABC):
3030

3131
enzmldoc: v2.EnzymeMLDocument
3232
measurement_ids: List[str]
33+
exclude_unmodeled_species: bool = True
3334

3435
def __init__(
3536
self,
3637
enzmldoc: v2.EnzymeMLDocument,
3738
measurement_ids: Optional[List[str]] = None,
3839
df_per_measurement: bool = False,
40+
exclude_unmodeled_species: bool = True,
3941
):
4042
assert isinstance(enzmldoc, v2.EnzymeMLDocument)
4143
assert isinstance(measurement_ids, list) or measurement_ids is None
4244

45+
# Remove empty measurements
46+
enzmldoc.measurements = [
47+
meas for meas in enzmldoc.measurements if meas.species_data
48+
]
49+
4350
if measurement_ids is None:
4451
measurement_ids = [meas.id for meas in enzmldoc.measurements]
4552

46-
self.enzmldoc = enzmldoc
53+
self.enzmldoc = enzmldoc.model_copy(deep=True)
4754
self.fitted_doc = enzmldoc.model_copy(deep=True)
4855
self.measurement_ids = measurement_ids
4956
self.df_per_measurement = df_per_measurement
57+
self.exclude_unmodeled_species = exclude_unmodeled_species
58+
59+
@staticmethod
60+
def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocument:
61+
"""
62+
Removes species that are not modeled from the EnzymeML document.
63+
64+
This method filters out species that are not referenced in any reactions or ODEs,
65+
cleaning up the document to only include modeled species. It also removes
66+
measurements that have no remaining species data after filtering.
67+
68+
Args:
69+
enzmldoc (v2.EnzymeMLDocument): The EnzymeML document to filter.
70+
71+
Returns:
72+
v2.EnzymeMLDocument: A deep copy of the document with unmodeled species removed.
73+
74+
Note:
75+
- Creates a deep copy to avoid modifying the original document
76+
- Removes measurements that become empty after species filtering
77+
- Only considers species from reactions (reactants/products) and ODE equations
78+
"""
79+
enzmldoc = enzmldoc.model_copy(deep=True)
80+
81+
# Collect all species that are explicitly modeled
82+
modeled_species = set()
83+
84+
# Add species from reactions (reactants and products)
85+
for reaction in enzmldoc.reactions:
86+
modeled_species.update(
87+
reactant.species_id for reactant in reaction.reactants
88+
)
89+
modeled_species.update(product.species_id for product in reaction.products)
90+
91+
# Add species from ODE equations
92+
modeled_species.update(
93+
equation.species_id
94+
for equation in enzmldoc.equations
95+
if equation.equation_type == v2.EquationType.ODE
96+
)
97+
98+
if not modeled_species:
99+
enzmldoc.measurements = []
100+
enzmldoc.small_molecules = []
101+
enzmldoc.proteins = []
102+
enzmldoc.complexes = []
103+
return enzmldoc
104+
105+
filtered_measurements = []
106+
for measurement in enzmldoc.measurements:
107+
# Filter species data to only include modeled species
108+
filtered_species_data = [
109+
data
110+
for data in measurement.species_data
111+
if data.species_id in modeled_species
112+
]
113+
114+
# Only keep measurements that still have species data
115+
if filtered_species_data:
116+
measurement.species_data = filtered_species_data
117+
filtered_measurements.append(measurement)
118+
119+
# Update all collections to only include modeled species
120+
enzmldoc.measurements = filtered_measurements
121+
enzmldoc.small_molecules = [
122+
species
123+
for species in enzmldoc.small_molecules
124+
if species.id in modeled_species
125+
]
126+
enzmldoc.proteins = [
127+
protein for protein in enzmldoc.proteins if protein.id in modeled_species
128+
]
129+
enzmldoc.complexes = [
130+
complex for complex in enzmldoc.complexes if complex.id in modeled_species
131+
]
132+
133+
return enzmldoc
50134

51135
@abstractmethod
52136
def integrate(
@@ -177,7 +261,12 @@ def df(self) -> pd.DataFrame:
177261
Raises:
178262
ValueError: If the conversion doesn't return a DataFrame.
179263
"""
180-
df = pe.to_pandas(self.enzmldoc, per_measurement=False)
264+
if self.exclude_unmodeled_species:
265+
enzmldoc = self._remove_unmodeled_species(self.enzmldoc)
266+
else:
267+
enzmldoc = self.enzmldoc
268+
269+
df = pe.to_pandas(enzmldoc, per_measurement=False)
181270

182271
# Drop all this rows where "id" is within measurement_ids
183272
df = (

pyenzyme/thinlayers/psyces.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66

77
from __future__ import annotations
88

9-
from copy import deepcopy
109
from dataclasses import dataclass
1110
from pathlib import Path
1211
from joblib import Parallel, delayed
12+
import dill
1313
import numpy as np
1414
import pandas as pd
1515
import os
@@ -89,6 +89,7 @@ def __init__(
8989
enzmldoc=enzmldoc,
9090
measurement_ids=measurement_ids,
9191
df_per_measurement=False,
92+
exclude_unmodeled_species=True,
9293
)
9394

9495
if not isinstance(model_dir, Path):
@@ -309,9 +310,10 @@ def _get_experimental_data(self):
309310
310311
Populates the inits, experimental_data, and cols attributes.
311312
"""
313+
enzmldoc = self._remove_unmodeled_species(self.enzmldoc)
312314
self.inits = [
313315
InitMap.from_measurement(measurement, self.df_map[measurement.id])
314-
for measurement in self.enzmldoc.measurements
316+
for measurement in enzmldoc.measurements
315317
if measurement.id in self.measurement_ids
316318
]
317319

@@ -526,7 +528,7 @@ def to_pysces_model(self, model: pysces.model):
526528
Returns:
527529
pysces.model: The updated model with initial conditions set.
528530
"""
529-
model = deepcopy(model)
531+
model = dill.loads(dill.dumps(model))
530532
model.sim_time = np.array(self.time)
531533
model.__dict__.update(
532534
{

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ mdmodels = "^0.2.1"
2323
joblib = "^1.5.0"
2424
bokeh = "^3.7.3"
2525
matplotlib = "^3.10"
26+
dill = ">=0.3.9,<0.5.0"
2627

2728
[tool.poetry.group.psyces.dependencies]
2829
pysces = "^1.2.3"

tests/unit/test_thinlayer.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
from pyenzyme.thinlayers.base import BaseThinLayer
2+
from pyenzyme.versions.v2 import EnzymeMLDocument, EquationType
3+
4+
# Mock data for creating test species measurements
5+
MOCK_DATA = {
6+
"initial": 1.0,
7+
"time": [1.0, 2.0, 3.0, 4.0],
8+
"data": [1.0, 2.0, 3.0, 4.0],
9+
}
10+
11+
12+
class TestThinLayer:
13+
"""Test suite for BaseThinLayer functionality."""
14+
15+
def test_remove_unmodeled_species_reaction(self):
16+
"""
17+
Test that unmodeled species are removed when they're not part of any reaction.
18+
19+
This test verifies that:
20+
- Species not referenced in reactions are removed from the document
21+
- Measurements containing only unmodeled species are removed
22+
- Measurements with mixed modeled/unmodeled species keep only modeled ones
23+
"""
24+
enzmldoc = self._create_enzmldoc()
25+
26+
# Add reaction with only Substrate and Product (Unmodeled is not included)
27+
reaction = enzmldoc.add_to_reactions(id="R1", name="R1")
28+
reaction.add_to_reactants(species_id="Substrate", stoichiometry=1)
29+
reaction.add_to_products(species_id="Product", stoichiometry=1)
30+
31+
# Remove unmodeled species
32+
thinlayer = MockThinLayer(enzmldoc)
33+
tl_enzmldoc = thinlayer.optimize()
34+
35+
assert len(tl_enzmldoc.small_molecules) == 2, (
36+
f"Unmodeled small molecules should be removed, but {len(tl_enzmldoc.small_molecules)} remain."
37+
)
38+
assert len(tl_enzmldoc.measurements) == 2, (
39+
f"Unmodeled measurements should be removed, but {len(tl_enzmldoc.measurements)} remain."
40+
)
41+
42+
measurement_has_unmodeled: list[str] = []
43+
44+
for measurement in tl_enzmldoc.measurements:
45+
for species_data in measurement.species_data:
46+
if species_data.species_id == "Unmodeled":
47+
measurement_has_unmodeled.append(measurement.id)
48+
49+
assert len(measurement_has_unmodeled) == 0, (
50+
f"Unmodeled species should be removed, but appears in measurements {measurement_has_unmodeled}."
51+
)
52+
53+
def test_remove_unmodeled_species_odes(self):
54+
"""
55+
Test that unmodeled species are removed when they're not part of any ODE.
56+
57+
This test verifies that:
58+
- Species not referenced in ODE equations are removed from the document
59+
- Measurements containing only unmodeled species are removed
60+
- Measurements with mixed modeled/unmodeled species keep only modeled ones
61+
"""
62+
enzmldoc = self._create_enzmldoc()
63+
64+
# Add ODEs with only Substrate and Product (Unmodeled is not included)
65+
enzmldoc.add_to_equations(
66+
species_id="Substrate",
67+
equation_type=EquationType.ODE,
68+
equation="-Substrate",
69+
)
70+
71+
enzmldoc.add_to_equations(
72+
species_id="Product",
73+
equation_type=EquationType.ODE,
74+
equation="Substrate",
75+
)
76+
77+
# Remove unmodeled species
78+
thinlayer = MockThinLayer(enzmldoc)
79+
tl_enzmldoc = thinlayer.optimize()
80+
81+
assert len(tl_enzmldoc.small_molecules) == 2, (
82+
f"Unmodeled small molecules should be removed, but {len(tl_enzmldoc.small_molecules)} remain."
83+
)
84+
assert len(tl_enzmldoc.measurements) == 2, (
85+
f"Unmodeled measurements should be removed, but {len(tl_enzmldoc.measurements)} remain."
86+
)
87+
88+
measurement_has_unmodeled: list[str] = []
89+
90+
for measurement in tl_enzmldoc.measurements:
91+
for species_data in measurement.species_data:
92+
if species_data.species_id == "Unmodeled":
93+
measurement_has_unmodeled.append(measurement.id)
94+
95+
assert len(measurement_has_unmodeled) == 0, (
96+
f"Unmodeled species should be removed, but appears in measurements {measurement_has_unmodeled}."
97+
)
98+
99+
def _create_enzmldoc(self) -> EnzymeMLDocument:
100+
"""
101+
Create a test EnzymeML document with various measurement scenarios.
102+
103+
Creates a document with:
104+
- Three species: Substrate, Product, and Unmodeled
105+
- Four measurements:
106+
- M1: Contains all three species (mixed modeled/unmodeled)
107+
- M2: Contains only modeled species (Substrate, Product)
108+
- M3: Contains only unmodeled species (Unmodeled)
109+
- M4: Empty measurement (no species data)
110+
111+
Returns:
112+
EnzymeMLDocument: A test document for use in unit tests.
113+
"""
114+
enzmldoc = EnzymeMLDocument(name="Test")
115+
116+
# Add small molecules
117+
substrate = enzmldoc.add_to_small_molecules(id="Substrate", name="Substrate")
118+
product = enzmldoc.add_to_small_molecules(id="Product", name="Product")
119+
unmodeled = enzmldoc.add_to_small_molecules(id="Unmodeled", name="Unmodeled")
120+
121+
# Add a measurement with unmodeled species
122+
measurement = enzmldoc.add_to_measurements(id="M1", name="M1")
123+
measurement.add_to_species_data(species_id=substrate.id, **MOCK_DATA)
124+
measurement.add_to_species_data(species_id=product.id, **MOCK_DATA)
125+
measurement.add_to_species_data(species_id=unmodeled.id, **MOCK_DATA)
126+
127+
# Add a Measurement only with modeled species
128+
measurement = enzmldoc.add_to_measurements(id="M2", name="M2")
129+
measurement.add_to_species_data(species_id=substrate.id, **MOCK_DATA)
130+
measurement.add_to_species_data(species_id=product.id, **MOCK_DATA)
131+
132+
# Add a Measurement with only unmodeled species
133+
measurement = enzmldoc.add_to_measurements(id="M3", name="M3")
134+
measurement.add_to_species_data(species_id=unmodeled.id, **MOCK_DATA)
135+
136+
# Add an empty measurement
137+
measurement = enzmldoc.add_to_measurements(id="M4", name="M4")
138+
139+
return enzmldoc
140+
141+
142+
class MockThinLayer(BaseThinLayer):
143+
"""
144+
Mock implementation of BaseThinLayer for testing purposes.
145+
146+
This class provides minimal implementations of the abstract methods
147+
to allow testing of the base class functionality without requiring
148+
a full thin layer implementation.
149+
"""
150+
151+
def integrate(self, *args, **kwargs):
152+
"""Mock integration method that does nothing."""
153+
pass
154+
155+
def optimize(self, *args, **kwargs):
156+
"""Mock optimization method that does nothing."""
157+
return self._remove_unmodeled_species(self.enzmldoc)
158+
159+
def write(self, *args, **kwargs):
160+
"""Mock write method that does nothing."""
161+
pass

0 commit comments

Comments
 (0)