diff --git a/q2_ms/plugin_setup.py b/q2_ms/plugin_setup.py index c3828f8..879bd57 100644 --- a/q2_ms/plugin_setup.py +++ b/q2_ms/plugin_setup.py @@ -64,7 +64,7 @@ citations=[], ) -plugin.methods.register_function( +plugin.pipelines.register_function( function=read_ms_experiment, inputs={"spectra": SampleData[mzML]}, outputs=[("xcms_experiment", XCMSExperiment)], diff --git a/q2_ms/xcms/read_ms_experiment.py b/q2_ms/xcms/read_ms_experiment.py index a18beab..f8106a7 100644 --- a/q2_ms/xcms/read_ms_experiment.py +++ b/q2_ms/xcms/read_ms_experiment.py @@ -5,28 +5,23 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import copy import os import tempfile -from qiime2 import Metadata +import pandas as pd from q2_ms.types import XCMSExperimentDirFmt, mzMLDirFmt from q2_ms.utils import run_r_script def read_ms_experiment( - spectra: mzMLDirFmt, - sample_metadata: Metadata = None, -) -> XCMSExperimentDirFmt: - # Create parameters dict - params = copy.copy(locals()) - - # Init XCMSExperimentDirFmt - xcms_experiment = XCMSExperimentDirFmt() - - # Add output path to params - params["output_path"] = str(xcms_experiment) + ctx, + spectra, + sample_metadata=None, +): + xcms_experiment_dir_fmt = XCMSExperimentDirFmt() + spectra = spectra.view(mzMLDirFmt) + params = {"spectra": str(spectra), "output_path": str(xcms_experiment_dir_fmt)} with tempfile.TemporaryDirectory() as tmp_dir: if sample_metadata is not None: @@ -42,9 +37,37 @@ def read_ms_experiment( # Run R script run_r_script("read_ms_experiment", params, "XCMS") + # Create artifact with correct type + xcms_experiment = ctx.make_artifact( + _get_type(str(xcms_experiment_dir_fmt)), xcms_experiment_dir_fmt + ) + return xcms_experiment +def _get_type(directory: str) -> str: + """ + Determines the semantic type of an XCMSExperiment based on MS level data. + + Parameters: + directory (str): Path to the XCMSExperiment directory. + + Returns: + str: The semantic type, either 'XCMSExperiment' or + 'XCMSExperiment % Properties("MS2")'. + """ + df = pd.read_csv( + os.path.join(directory, "ms_backend_data.txt"), + sep="\t", + usecols=["msLevel"], + skiprows=1, + index_col=0, + ) + if (df["msLevel"] == 2).any(): + return 'XCMSExperiment % Properties("MS2")' + return "XCMSExperiment" + + def _validate_metadata(metadata, spectra_path): """ Validates that sample IDs in the metadata match the filenames in the spectra diff --git a/q2_ms/xcms/tests/data/get_type/ms1/ms_backend_data.txt b/q2_ms/xcms/tests/data/get_type/ms1/ms_backend_data.txt new file mode 100644 index 0000000..2914d54 --- /dev/null +++ b/q2_ms/xcms/tests/data/get_type/ms1/ms_backend_data.txt @@ -0,0 +1,4 @@ +# MsBackendMzR +"msLevel" "rtime" "acquisitionNum" +"1" 1 2501.378 1 +"2" 1 2502.943 2 diff --git a/q2_ms/xcms/tests/data/get_type/ms2/ms_backend_data.txt b/q2_ms/xcms/tests/data/get_type/ms2/ms_backend_data.txt new file mode 100644 index 0000000..1cc6e34 --- /dev/null +++ b/q2_ms/xcms/tests/data/get_type/ms2/ms_backend_data.txt @@ -0,0 +1,4 @@ +# MsBackendMzR +"msLevel" "rtime" "acquisitionNum" +"1" 1 2501.378 1 +"2" 2 2502.943 2 diff --git a/q2_ms/xcms/tests/data/ms2_spectra/ms2_simulated.mzML b/q2_ms/xcms/tests/data/ms2_spectra/ms2_simulated.mzML new file mode 100644 index 0000000..348b727 --- /dev/null +++ b/q2_ms/xcms/tests/data/ms2_spectra/ms2_simulated.mzML @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AAAAAAAAWUAAAAAAAMBiQAAAAAAAAGlA + + + + + + AAD6QwAAekQAAEhE + + + + + + + + + 3028 + + +4539 +0 + \ No newline at end of file diff --git a/q2_ms/xcms/tests/test_read_ms_experiment.py b/q2_ms/xcms/tests/test_read_ms_experiment.py index 94f296f..af87304 100644 --- a/q2_ms/xcms/tests/test_read_ms_experiment.py +++ b/q2_ms/xcms/tests/test_read_ms_experiment.py @@ -9,10 +9,12 @@ import pandas as pd import qiime2 +from qiime2 import Artifact from qiime2.plugin.testing import TestPluginBase +from qiime2.sdk import parse_type -from q2_ms.types import mzMLDirFmt -from q2_ms.xcms.read_ms_experiment import _validate_metadata, read_ms_experiment +from q2_ms.types import XCMSExperimentDirFmt, mzMLDirFmt +from q2_ms.xcms.read_ms_experiment import _get_type, _validate_metadata class TestReadMsExperiment(TestPluginBase): @@ -20,39 +22,41 @@ class TestReadMsExperiment(TestPluginBase): def setUp(self): super().setUp() - self.sample_metadata = pd.read_csv( - self.get_data_path("faahKO_sample_data/sample_metadata.tsv"), - sep="\t", - index_col=0, + self.sample_metadata = qiime2.Metadata.load( + self.get_data_path("faahKO_sample_data/sample_metadata.tsv") + ) + self.spectra_dir = mzMLDirFmt(self.get_data_path("faahKO"), mode="r") + self.spectra = Artifact.import_data("SampleData[mzML]", self.spectra_dir) + self.spectra_ms2_dir = mzMLDirFmt(self.get_data_path("ms2_spectra"), mode="r") + self.spectra_ms2 = Artifact.import_data( + "SampleData[mzML]", self.spectra_ms2_dir ) - self.spectra = mzMLDirFmt(self.get_data_path("faahKO"), mode="r") + self.read_ms_experiment = self.plugin.pipelines["read_ms_experiment"] def test_read_ms_experiment_metadata(self): - xcms_experiment = read_ms_experiment( - spectra=self.spectra, - sample_metadata=qiime2.Metadata(self.sample_metadata), + (xcms_experiment,) = self.read_ms_experiment( + spectra=self.spectra, sample_metadata=self.sample_metadata ) - sample_data_exp = pd.read_csv( - self.get_data_path( - "ms_experiment_sample_data/ms_experiment_sample_data_metadata.txt" - ), - sep="\t", - index_col=0, + self._test_read_ms_experiment_helper( + xcms_experiment, "ms_experiment_sample_data_metadata.txt" ) - sample_data_obs = pd.read_csv( - os.path.join(str(xcms_experiment), "ms_experiment_sample_data.txt"), - sep="\t", - index_col=0, + + def test_read_ms_experiment_no_metadata(self): + (xcms_experiment,) = self.read_ms_experiment( + spectra=self.spectra, + ) + self._test_read_ms_experiment_helper( + xcms_experiment, "ms_experiment_sample_data_default.txt" ) - sample_data_obs.drop(columns=["spectraOrigin"], inplace=True) - pd.testing.assert_frame_equal(sample_data_exp, sample_data_obs) + def _test_read_ms_experiment_helper(self, xcms_experiment, exp_sample_data): + self.assertEqual(xcms_experiment.type, parse_type("XCMSExperiment")) + + xcms_experiment = xcms_experiment.view(XCMSExperimentDirFmt) - def test_read_ms_experiment_without_metadata(self): - xcms_experiment = read_ms_experiment(spectra=self.spectra) sample_data_exp = pd.read_csv( self.get_data_path( - "ms_experiment_sample_data/ms_experiment_sample_data_default.txt" + os.path.join("ms_experiment_sample_data", exp_sample_data) ), sep="\t", index_col=0, @@ -66,13 +70,27 @@ def test_read_ms_experiment_without_metadata(self): pd.testing.assert_frame_equal(sample_data_exp, sample_data_obs) + def test_read_ms_experiment_ms2(self): + (xcms_experiment,) = self.read_ms_experiment(spectra=self.spectra_ms2) + self.assertEqual( + xcms_experiment.type, parse_type("XCMSExperiment % Properties('MS2')") + ) + def test_validate_metadata_missing(self): - metadata_missing = self.sample_metadata.drop(index="wt22") + metadata_missing = self.sample_metadata.to_dataframe().drop(index="wt22") with self.assertRaisesRegex(ValueError, "missing in sample-metadata: {'wt22'}"): - _validate_metadata(metadata_missing, str(self.spectra)) + _validate_metadata(metadata_missing, str(self.spectra_dir)) def test_read_ms_experiment_added(self): - metadata_added = self.sample_metadata.copy() + metadata_added = self.sample_metadata.to_dataframe().copy() metadata_added.loc["wt23"] = ["WT", "study"] with self.assertRaisesRegex(ValueError, "missing in spectra: {'wt23'}"): - _validate_metadata(metadata_added, str(self.spectra)) + _validate_metadata(metadata_added, str(self.spectra_dir)) + + def test_get_type_ms2(self): + type = _get_type(self.get_data_path("get_type/ms2")) + self.assertEqual(type, 'XCMSExperiment % Properties("MS2")') + + def test_get_type_ms1(self): + type = _get_type(self.get_data_path("get_type/ms1")) + self.assertEqual(type, "XCMSExperiment")