Skip to content

Commit 5f6cdac

Browse files
authored
Merge pull request #81 from EnzymeML/measurement-docstrings
Update docstrings in `io.py`
2 parents 9431c6e + 2c420f9 commit 5f6cdac

File tree

2 files changed

+141
-41
lines changed

2 files changed

+141
-41
lines changed

pyenzyme/tabular.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ def read_excel(
8484
time_unit: str,
8585
data_type: DataTypes = DataTypes.CONCENTRATION,
8686
):
87-
"""Reads a CSV file from the specified path into a measurement.
87+
"""Reads an Excel file from the specified path into a measurement.
8888
89-
This function expects the CSV file to have the following structure:
89+
This function expects the Excel file to have the following structure:
9090
9191
- time: The time points of the measurements. Should start at 0.
9292
- id: The ID of the measurement. Only needed in case of multiple measurements.
@@ -98,11 +98,10 @@ def read_excel(
9898
file, you need to have an 'id' column. Otherwise it will return a single measurement.
9999
100100
Args:
101-
path (str, pathlib.Path): The path to the CSV file.
101+
path (str, pathlib.Path): The path to the Excel file.
102102
data_unit (str): The unit of the data.
103103
time_unit (str): The unit of the time.
104104
data_type (DataTypes): The type of the data. Default is DataTypes.CONCENTRATION.
105-
sep (str): The separator of the CSV file. Default is ';'.
106105
107106
Returns:
108107
list[Measurement]: A list of measurements.
@@ -156,7 +155,7 @@ def read_csv(
156155
data_unit (str): The unit of the data.
157156
time_unit (str): The unit of the time.
158157
data_type (DataTypes): The type of the data. Default is DataTypes.CONCENTRATION.
159-
sep (str): The separator of the CSV file. Default is ';'.
158+
sep (str): The separator of the CSV file. Default is '\t'.
160159
161160
Returns:
162161
list[Measurement]: A list of measurements.

pyenzyme/versions/io.py

Lines changed: 137 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,30 @@ def from_dataframe(
8686
data_unit: str,
8787
time_unit: str,
8888
) -> list[v2.Measurement]: # noqa: F405
89-
"""Create measurements from a pandas DataFrame.
89+
"""Parse a pandas DataFrame into a list of measurements.
90+
91+
This function expects the DataFrame to have the following structure:
92+
93+
- time: The time points of the measurements. Should start at 0.
94+
- id: The ID of the measurement. Only needed in case of multiple measurements.
95+
- [species_id]: Per column, the data of a species.
96+
97+
If there is no 'id' column, the function assumes that there is only one measurement
98+
in the file. If there is an 'id' column, the function assumes that there are multiple
99+
measurements in the file. Hence, if you want to have multiple measurements in the same
100+
file, you need to have an 'id' column. Otherwise, it will return a single measurement.
90101
91102
Args:
92-
df: DataFrame containing measurement data
93-
data_unit: Unit for the measurement data
94-
time_unit: Unit for the time data
103+
df (pd.DataFrame): The DataFrame to parse.
104+
data_unit (str): The unit of the data.
105+
time_unit (str): The unit of the time.
95106
96107
Returns:
97-
List of Measurement objects
108+
list[Measurement]: A list of Measurement objects.
109+
110+
Raises:
111+
FileNotFoundError: If the file does not exist.
112+
ValueError: If the path is not a file.
98113
"""
99114
return from_dataframe(df, data_unit, time_unit)
100115

@@ -136,32 +151,72 @@ def to_sbml(
136151
cls,
137152
enzmldoc: v2.EnzymeMLDocument,
138153
path: Path | str | None = None,
154+
verbose: bool = False,
139155
) -> tuple[str, pd.DataFrame | None]: # noqa: F405
140-
"""Convert an EnzymeML document to SBML format and write to a file.
156+
"""Convert an EnzymeML document to SBML format and write it to a file.
157+
158+
The systems biology markup language (SBML) is a machine-readable format for
159+
representing models of biochemical reaction networks. This function converts
160+
an EnzymeML document to an SBML document. Prior to serialization the EnzymeML
161+
document is validated for SBML export.
162+
163+
Example:
164+
>> import pyenzyme as pe
165+
>> doc = pe.EnzymeMLDocument()
166+
>> [add entities to doc]
167+
>> to_sbml(doc, "example.xml")
141168
142169
Args:
143-
enzmldoc: The EnzymeML document to convert
144-
path: Path to write the SBML document to
170+
enzmldoc (pe.EnzymeMLDocument): The EnzymeML document to convert.
171+
path (Path | str | None, optional): The output file to write the SBML document to. Defaults to None.
172+
verbose (bool, optional): Whether to print warnings during SBML validation. Defaults to False.
145173
146174
Returns:
147-
Tuple of the SBML document and the measurement data, or None if path is None
175+
tuple[str, pd.DataFrame]: The SBML document as a string, and a DataFrame with the measurement data.
176+
177+
Raises:
178+
ValueError: If the EnzymeML document is not valid for SBML export.
148179
"""
149-
return to_sbml(enzmldoc, path)
180+
return to_sbml(enzmldoc, path, verbose)
150181

151182
@classmethod
152183
def to_petab(
153184
cls,
154185
enzmldoc: v2.EnzymeMLDocument,
155186
path: Path | str,
156187
) -> PEtab: # noqa: F405
157-
"""Convert an EnzymeML document to PEtab format and write to a file.
158-
159-
Args:
160-
enzmldoc: The EnzymeML document to convert
161-
path: Path to write the PEtab document to
162-
163-
Returns:
164-
The PEtab object
188+
"""
189+
Convert an EnzymeML document to a PEtab parameter estimation problem and write to file.
190+
191+
This function exports an EnzymeML document to the PEtab format, which is a
192+
standardized format for specifying parameter estimation problems in systems biology.
193+
The function creates all necessary PEtab files:
194+
195+
1. SBML model file: Contains the mathematical model specification
196+
2. Condition table: Specifies experimental conditions
197+
3. Observable table: Defines model outputs that correspond to measurements
198+
4. Measurement table: Contains experimental data points
199+
5. Parameter table: Defines model parameters and their estimation settings
200+
6. YAML configuration file: Links all files together in a PEtab problem definition
201+
202+
Args
203+
----
204+
enzmldoc : v2.EnzymeMLDocument
205+
The EnzymeML document to convert, containing all model information,
206+
measurements, and parameters.
207+
path : Union[Path, str]
208+
Directory path where PEtab files will be written. If the directory
209+
doesn't exist, it will be created.
210+
211+
Returns
212+
-------
213+
None
214+
Files are written to the specified path.
215+
216+
Notes
217+
-----
218+
The file naming convention is based on the EnzymeML document name,
219+
with spaces replaced by underscores and converted to lowercase.
165220
"""
166221
return to_petab(enzmldoc, path)
167222

@@ -170,13 +225,20 @@ def from_sbml(
170225
cls,
171226
path: Path | str,
172227
) -> v2.EnzymeMLDocument: # noqa: F405
173-
"""Read an EnzymeML document from an SBML file.
228+
"""
229+
Read an SBML file and initialize an EnzymeML document.
230+
231+
This function reads an SBML file from an OMEX archive, extracts all relevant
232+
information, and creates an EnzymeML document with the extracted data. It handles
233+
different versions of the EnzymeML format and maps SBML elements to their
234+
corresponding EnzymeML entities.
174235
175236
Args:
176-
path: Path to the SBML file
237+
path (Path | str): The path to the OMEX archive containing the SBML file.
177238
178239
Returns:
179-
An EnzymeMLDocument object
240+
An initialized EnzymeMLDocument object with extracted units, species, vessels,
241+
equations, parameters, reactions, and measurements.
180242
"""
181243
return read_sbml(v2.EnzymeMLDocument, path)
182244

@@ -188,11 +250,20 @@ def to_pandas(
188250
) -> pd.DataFrame | dict[str, pd.DataFrame]: # noqa: F405
189251
"""Convert an EnzymeML document to a pandas DataFrame.
190252
253+
The resulting DataFrame contains the following columns:
254+
255+
- time: The time values of the measurement.
256+
- id: The ID of the measurement.
257+
- [species]: The species data of the measurement per column.
258+
191259
Args:
192-
enzmldoc: The EnzymeML document to convert
260+
enzmldoc (EnzymeMLDocument): The EnzymeMLDocument object to convert
261+
per_measurement (bool): If True, returns a dictionary of DataFrames keyed by measurement ID.
262+
If False, returns a single DataFrame containing all measurements.
193263
194264
Returns:
195-
DataFrame containing the measurement data, or None if no measurements exist
265+
pd.DataFrame or dictionary of DataFrames containing the measurement data,
266+
or None if no measurements exist
196267
"""
197268
df = to_pandas(enzmldoc)
198269

@@ -213,17 +284,32 @@ def from_csv(
213284
data_type: v2.DataTypes = v2.DataTypes.CONCENTRATION,
214285
sep: str = "\t",
215286
) -> list[v2.Measurement]: # noqa: F405
216-
"""Create measurements from a CSV file.
287+
"""Reads a CSV file from the specified path into a measurement.
288+
289+
This function expects the CSV file to have the following structure:
290+
291+
- time: The time points of the measurements. Should start at 0.
292+
- id: The ID of the measurement. Only needed in case of multiple measurements.
293+
- [species_id]: Per column, the data of a species.
294+
295+
If there is no 'id' column, the function assumes that there is only one measurement
296+
in the file. If there is an 'id' column, the function assumes that there are multiple
297+
measurements in the file. Hence, if you want to have multiple measurements in the same
298+
file, you need to have an 'id' column. Otherwise it will return a single measurement.
217299
218300
Args:
219-
path: Path to the CSV file
220-
data_unit: Unit for the measurement data
221-
time_unit: Unit for the time data
222-
data_type: Type of data (default: CONCENTRATION)
223-
sep: Separator used in the CSV file (default: tab)
301+
path (str, pathlib.Path): The path to the CSV file.
302+
data_unit (str): The unit of the data.
303+
time_unit (str): The unit of the time.
304+
data_type (DataTypes): The type of the data. Default is DataTypes.CONCENTRATION.
305+
sep (str): The separator of the CSV file. Default is '\t'.
224306
225307
Returns:
226-
List of Measurement objects
308+
list[Measurement]: A list of Measurement objects.
309+
310+
Raises:
311+
FileNotFoundError: If the file does not exist.
312+
ValueError: If the path is not a file.
227313
"""
228314
return read_csv(path, data_unit, time_unit, data_type, sep)
229315

@@ -235,16 +321,31 @@ def from_excel(
235321
time_unit: str,
236322
data_type: v2.DataTypes = v2.DataTypes.CONCENTRATION,
237323
) -> list[v2.Measurement]: # noqa: F405
238-
"""Create measurements from an Excel file.
324+
"""Reads an Excel file from the specified path into Measurement objects.
325+
326+
This function expects the Excel file to have the following structure:
327+
328+
- time: The time points of the measurements. Should start at 0.
329+
- id: The ID of the measurement. Only needed in case of multiple measurements.
330+
- [species_id]: Per column, the data of a species.
331+
332+
If there is no 'id' column, the function assumes that there is only one measurement
333+
in the file. If there is an 'id' column, the function assumes that there are multiple
334+
measurements in the file. Hence, if you want to have multiple measurements in the same
335+
file, you need to have an 'id' column. Otherwise it will return a single measurement.
239336
240337
Args:
241-
path: Path to the Excel file
242-
data_unit: Unit for the measurement data
243-
time_unit: Unit for the time data
244-
data_type: Type of data (default: CONCENTRATION)
338+
path (str, pathlib.Path): The path to the Excel file.
339+
data_unit (str): The unit of the data.
340+
time_unit (str): The unit of the time.
341+
data_type (DataTypes): The type of the data. Default is DataTypes.CONCENTRATION.
245342
246343
Returns:
247-
List of Measurement objects
344+
list[Measurement]: A list of measurements.
345+
346+
Raises:
347+
FileNotFoundError: If the file does not exist.
348+
ValueError: If the path is not a file.
248349
"""
249350
return read_excel(path, data_unit, time_unit, data_type)
250351

0 commit comments

Comments
 (0)