Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion q2_ms/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
citations=[],
)

plugin.methods.register_function(
plugin.pipelines.register_function(
function=read_ms_experiment,
inputs={"spectra": SampleData[mzML]},
outputs=[("xcms_experiment", XCMSExperiment)],
Expand Down
49 changes: 36 additions & 13 deletions q2_ms/xcms/read_ms_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,23 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import copy
import os
import tempfile

from qiime2 import Metadata
import pandas as pd

from q2_ms.types import XCMSExperimentDirFmt, mzMLDirFmt
from q2_ms.utils import run_r_script


def read_ms_experiment(
spectra: mzMLDirFmt,
sample_metadata: Metadata = None,
) -> XCMSExperimentDirFmt:
# Create parameters dict
params = copy.copy(locals())

# Init XCMSExperimentDirFmt
xcms_experiment = XCMSExperimentDirFmt()

# Add output path to params
params["output_path"] = str(xcms_experiment)
ctx,
spectra,
sample_metadata=None,
):
xcms_experiment_dir_fmt = XCMSExperimentDirFmt()
spectra = spectra.view(mzMLDirFmt)
params = {"spectra": str(spectra), "output_path": str(xcms_experiment_dir_fmt)}

with tempfile.TemporaryDirectory() as tmp_dir:
if sample_metadata is not None:
Expand All @@ -42,9 +37,37 @@ def read_ms_experiment(
# Run R script
run_r_script("read_ms_experiment", params, "XCMS")

# Create artifact with correct type
xcms_experiment = ctx.make_artifact(
_get_type(str(xcms_experiment_dir_fmt)), xcms_experiment_dir_fmt
)

return xcms_experiment


def _get_type(directory: str) -> str:
"""
Determines the semantic type of an XCMSExperiment based on MS level data.

Parameters:
directory (str): Path to the XCMSExperiment directory.

Returns:
str: The semantic type, either 'XCMSExperiment' or
'XCMSExperiment % Properties("MS2")'.
"""
df = pd.read_csv(
os.path.join(directory, "ms_backend_data.txt"),
sep="\t",
usecols=["msLevel"],
skiprows=1,
index_col=0,
)
if (df["msLevel"] == 2).any():
return 'XCMSExperiment % Properties("MS2")'
return "XCMSExperiment"


def _validate_metadata(metadata, spectra_path):
"""
Validates that sample IDs in the metadata match the filenames in the spectra
Expand Down
4 changes: 4 additions & 0 deletions q2_ms/xcms/tests/data/get_type/ms1/ms_backend_data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# MsBackendMzR
"msLevel" "rtime" "acquisitionNum"
"1" 1 2501.378 1
"2" 1 2502.943 2
4 changes: 4 additions & 0 deletions q2_ms/xcms/tests/data/get_type/ms2/ms_backend_data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# MsBackendMzR
"msLevel" "rtime" "acquisitionNum"
"1" 1 2501.378 1
"2" 2 2502.943 2
82 changes: 82 additions & 0 deletions q2_ms/xcms/tests/data/ms2_spectra/ms2_simulated.mzML
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0_idx.xsd">
<mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" accession="" version="1.1.0">
<cvList count="5">
<cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/>
<cv id="UO" fullName="Unit Ontology" URI="http://obo.cvs.sourceforge.net/obo/obo/ontology/phenotype/unit.obo"/>
<cv id="BTO" fullName="BrendaTissue545" version="unknown" URI="http://www.brenda-enzymes.info/ontology/tissue/tree/update/update_files/BrendaTissueOBO"/>
<cv id="GO" fullName="Gene Ontology - Slim Versions" version="unknown" URI="http://www.geneontology.org/GO_slims/goslim_goa.obo"/>
<cv id="PATO" fullName="Quality ontology" version="unknown" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/quality.obo"/>
</cvList>
<fileDescription>
<fileContent>
<cvParam cvRef="MS" accession="MS:1000294" name="mass spectrum" />
</fileContent>
</fileDescription>
<sampleList count="1">
<sample id="sa_0" name="">
<cvParam cvRef="MS" accession="MS:1000004" name="sample mass" value="0" unitAccession="UO:0000021" unitName="gram" unitCvRef="UO" />
<cvParam cvRef="MS" accession="MS:1000005" name="sample volume" value="0" unitAccession="UO:0000098" unitName="milliliter" unitCvRef="UO" />
<cvParam cvRef="MS" accession="MS:1000006" name="sample concentration" value="0" unitAccession="UO:0000175" unitName="gram per liter" unitCvRef="UO" />
</sample>
</sampleList>
<softwareList count="2">
<software id="so_in_0" version="" >
<cvParam cvRef="MS" accession="MS:1000799" name="custom unreleased software tool" value="" />
</software>
<software id="so_default" version="" >
<cvParam cvRef="MS" accession="MS:1000799" name="custom unreleased software tool" value="" />
</software>
</softwareList>
<instrumentConfigurationList count="1">
<instrumentConfiguration id="ic_0">
<cvParam cvRef="MS" accession="MS:1000031" name="instrument model" />
<softwareRef ref="so_in_0" />
</instrumentConfiguration>
</instrumentConfigurationList>
<dataProcessingList count="1">
<dataProcessing id="dp_sp_0">
<processingMethod order="0" softwareRef="so_default">
<cvParam cvRef="MS" accession="MS:1000544" name="Conversion to mzML" />
<userParam name="warning" type="xsd:string" value="fictional processing method used to fulfill format requirements" />
</processingMethod>
</dataProcessing>
</dataProcessingList>
<run id="ru_0" defaultInstrumentConfigurationRef="ic_0" sampleRef="sa_0">
<spectrumList count="1" defaultDataProcessingRef="dp_sp_0">
<spectrum id="spectrum=0" index="0" defaultArrayLength="3" dataProcessingRef="dp_sp_0">
<cvParam cvRef="MS" accession="MS:1000525" name="spectrum representation" />
<cvParam cvRef="MS" accession="MS:1000511" name="ms level" value="2" />
<cvParam cvRef="MS" accession="MS:1000294" name="mass spectrum" />
<scanList count="1">
<cvParam cvRef="MS" accession="MS:1000795" name="no combination" />
<scan>
<cvParam cvRef="MS" accession="MS:1000016" name="scan start time" value="-1" unitAccession="UO:0000010" unitName="second" unitCvRef="UO" />
</scan>
</scanList>
<binaryDataArrayList count="2">
<binaryDataArray encodedLength="32">
<cvParam cvRef="MS" accession="MS:1000514" name="m/z array" unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
<cvParam cvRef="MS" accession="MS:1000523" name="64-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>AAAAAAAAWUAAAAAAAMBiQAAAAAAAAGlA</binary>
</binaryDataArray>
<binaryDataArray encodedLength="16">
<cvParam cvRef="MS" accession="MS:1000515" name="intensity array" unitAccession="MS:1000131" unitName="number of detector counts" unitCvRef="MS"/>
<cvParam cvRef="MS" accession="MS:1000521" name="32-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>AAD6QwAAekQAAEhE</binary>
</binaryDataArray>
</binaryDataArrayList>
</spectrum>
</spectrumList>
</run>
</mzML>
<indexList count="1">
<index name="spectrum">
<offset idRef="spectrum=0">3028</offset>
</index>
</indexList>
<indexListOffset>4539</indexListOffset>
<fileChecksum>0</fileChecksum>
</indexedmzML>
76 changes: 47 additions & 29 deletions q2_ms/xcms/tests/test_read_ms_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,50 +9,54 @@

import pandas as pd
import qiime2
from qiime2 import Artifact
from qiime2.plugin.testing import TestPluginBase
from qiime2.sdk import parse_type

from q2_ms.types import mzMLDirFmt
from q2_ms.xcms.read_ms_experiment import _validate_metadata, read_ms_experiment
from q2_ms.types import XCMSExperimentDirFmt, mzMLDirFmt
from q2_ms.xcms.read_ms_experiment import _get_type, _validate_metadata


class TestReadMsExperiment(TestPluginBase):
package = "q2_ms.xcms.tests"

def setUp(self):
super().setUp()
self.sample_metadata = pd.read_csv(
self.get_data_path("faahKO_sample_data/sample_metadata.tsv"),
sep="\t",
index_col=0,
self.sample_metadata = qiime2.Metadata.load(
self.get_data_path("faahKO_sample_data/sample_metadata.tsv")
)
self.spectra_dir = mzMLDirFmt(self.get_data_path("faahKO"), mode="r")
self.spectra = Artifact.import_data("SampleData[mzML]", self.spectra_dir)
self.spectra_ms2_dir = mzMLDirFmt(self.get_data_path("ms2_spectra"), mode="r")
self.spectra_ms2 = Artifact.import_data(
"SampleData[mzML]", self.spectra_ms2_dir
)
self.spectra = mzMLDirFmt(self.get_data_path("faahKO"), mode="r")
self.read_ms_experiment = self.plugin.pipelines["read_ms_experiment"]

def test_read_ms_experiment_metadata(self):
xcms_experiment = read_ms_experiment(
spectra=self.spectra,
sample_metadata=qiime2.Metadata(self.sample_metadata),
(xcms_experiment,) = self.read_ms_experiment(
spectra=self.spectra, sample_metadata=self.sample_metadata
)
sample_data_exp = pd.read_csv(
self.get_data_path(
"ms_experiment_sample_data/ms_experiment_sample_data_metadata.txt"
),
sep="\t",
index_col=0,
self._test_read_ms_experiment_helper(
xcms_experiment, "ms_experiment_sample_data_metadata.txt"
)
sample_data_obs = pd.read_csv(
os.path.join(str(xcms_experiment), "ms_experiment_sample_data.txt"),
sep="\t",
index_col=0,

def test_read_ms_experiment_no_metadata(self):
(xcms_experiment,) = self.read_ms_experiment(
spectra=self.spectra,
)
self._test_read_ms_experiment_helper(
xcms_experiment, "ms_experiment_sample_data_default.txt"
)
sample_data_obs.drop(columns=["spectraOrigin"], inplace=True)

pd.testing.assert_frame_equal(sample_data_exp, sample_data_obs)
def _test_read_ms_experiment_helper(self, xcms_experiment, exp_sample_data):
self.assertEqual(xcms_experiment.type, parse_type("XCMSExperiment"))

xcms_experiment = xcms_experiment.view(XCMSExperimentDirFmt)

def test_read_ms_experiment_without_metadata(self):
xcms_experiment = read_ms_experiment(spectra=self.spectra)
sample_data_exp = pd.read_csv(
self.get_data_path(
"ms_experiment_sample_data/ms_experiment_sample_data_default.txt"
os.path.join("ms_experiment_sample_data", exp_sample_data)
),
sep="\t",
index_col=0,
Expand All @@ -66,13 +70,27 @@ def test_read_ms_experiment_without_metadata(self):

pd.testing.assert_frame_equal(sample_data_exp, sample_data_obs)

def test_read_ms_experiment_ms2(self):
(xcms_experiment,) = self.read_ms_experiment(spectra=self.spectra_ms2)
self.assertEqual(
xcms_experiment.type, parse_type("XCMSExperiment % Properties('MS2')")
)

def test_validate_metadata_missing(self):
metadata_missing = self.sample_metadata.drop(index="wt22")
metadata_missing = self.sample_metadata.to_dataframe().drop(index="wt22")
with self.assertRaisesRegex(ValueError, "missing in sample-metadata: {'wt22'}"):
_validate_metadata(metadata_missing, str(self.spectra))
_validate_metadata(metadata_missing, str(self.spectra_dir))

def test_read_ms_experiment_added(self):
metadata_added = self.sample_metadata.copy()
metadata_added = self.sample_metadata.to_dataframe().copy()
metadata_added.loc["wt23"] = ["WT", "study"]
with self.assertRaisesRegex(ValueError, "missing in spectra: {'wt23'}"):
_validate_metadata(metadata_added, str(self.spectra))
_validate_metadata(metadata_added, str(self.spectra_dir))

def test_get_type_ms2(self):
type = _get_type(self.get_data_path("get_type/ms2"))
self.assertEqual(type, 'XCMSExperiment % Properties("MS2")')

def test_get_type_ms1(self):
type = _get_type(self.get_data_path("get_type/ms1"))
self.assertEqual(type, "XCMSExperiment")
Loading