Skip to content
55 changes: 55 additions & 0 deletions q2_ms/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
# ----------------------------------------------------------------------------
import importlib

from q2_types.metadata import ImmutableMetadata
from q2_types.sample_data import SampleData
from qiime2.core.type import Choices, Properties, Str, TypeMap
from qiime2.plugin import Citations, Metadata, Plugin

from q2_ms import __version__
Expand Down Expand Up @@ -38,6 +40,7 @@
mzMLFormat,
)
from q2_ms.xcms.database import fetch_massbank
from q2_ms.xcms.metadata import create_spectral_metadata
from q2_ms.xcms.read_ms_experiment import read_ms_experiment

citations = Citations.load("citations.bib", package="q2_ms")
Expand Down Expand Up @@ -67,6 +70,58 @@
citations=[],
)

P_ms_level, I_xcms_experiment, _ = TypeMap(
{
(Str % Choices(["1"]), XCMSExperiment): ImmutableMetadata,
(Str % Choices(["1"]), XCMSExperiment % Properties("peaks")): ImmutableMetadata,
(
Str % Choices(["1"]),
XCMSExperiment % Properties("features"),
): ImmutableMetadata,
(Str % Choices(["1"]), XCMSExperiment % Properties("MS2")): ImmutableMetadata,
(
Str % Choices(["1"]),
XCMSExperiment % Properties("MS2", "peaks"),
): ImmutableMetadata,
(
Str % Choices(["1"]),
XCMSExperiment % Properties("MS2", "features"),
): ImmutableMetadata,
(Str % Choices(["2"]), XCMSExperiment % Properties("MS2")): ImmutableMetadata,
(
Str % Choices(["2"]),
XCMSExperiment % Properties("MS2", "peaks"),
): ImmutableMetadata,
(
Str % Choices(["2"]),
XCMSExperiment % Properties("MS2", "features"),
): ImmutableMetadata,
}
)


plugin.methods.register_function(
function=create_spectral_metadata,
inputs={"xcms_experiment": I_xcms_experiment},
outputs=[("spectral_metadata", ImmutableMetadata)],
parameters={"ms_level": P_ms_level},
input_descriptions={"xcms_experiment": "XCMSExperiment."},
output_descriptions={"spectral_metadata": "Spectral metadata of all MS1 scans."},
parameter_descriptions={
"ms_level": "If the spectral metadata should be created for MS1 or MS2 scans."
},
name="Create spectral metadata",
description=(
"This action creates a spectral metadata table from a XCMSExperiment artifact. "
"This metadata can be used to plot total ion chromatograms or base peak "
"chromatograms and other line and box plots with q2-vizard.\n\nNOTE:\nThe data "
"gets filtered by MS level and only MS1 scans are retained. Also the name of "
"the column defining the sample id in the sample data will get '_' added as a "
"suffix."
),
citations=[],
)

plugin.pipelines.register_function(
function=read_ms_experiment,
inputs={"spectra": SampleData[mzML]},
Expand Down
88 changes: 88 additions & 0 deletions q2_ms/xcms/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2025, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os

import pandas as pd
import qiime2
from qiime2.metadata.base import is_id_header

from q2_ms.types import XCMSExperimentDirFmt


def create_spectral_metadata(
xcms_experiment: XCMSExperimentDirFmt, ms_level: str = "1"
) -> qiime2.Metadata:
# Read the backend data file while skipping the first line
backend_df = pd.read_csv(
os.path.join(str(xcms_experiment), "ms_backend_data.txt"),
sep="\t",
skiprows=1,
index_col=0,
)

# Read the sample data file
sample_df = pd.read_csv(
os.path.join(str(xcms_experiment), "ms_experiment_sample_data.txt"), sep="\t"
)
sample_df.drop(columns=["spectraOrigin"], inplace=True)

# Read the links file that maps sample indices to spectra indices.
links_df = pd.read_csv(
os.path.join(
str(xcms_experiment), "ms_experiment_sample_data_links_spectra.txt"
),
sep="\t",
header=None,
names=["sample_id", "spectra_index"],
)

# Merge the backend data with the links data using the spectra_index.
merged_df = backend_df.join(links_df.set_index("spectra_index"), how="left")

# Merge the merged_df with the sample_df using the sample_id.
merged_df = merged_df.join(sample_df, on="sample_id")

# Create new column that shows the difference between rtime and the adjusted rtime
if "rtime_adjusted" in merged_df.columns:
merged_df["rtime_adjusted-rtime"] = (
merged_df["rtime_adjusted"] - merged_df["rtime"]
)

# Set index to str
merged_df.index.name = "id"
merged_df.index = merged_df.index.astype(str)

# Set column centroided to str
if "centroided" in merged_df.columns:
merged_df["centroided"] = merged_df["centroided"].astype(str)

# Adds "_" to column name if it is a reserved metadata index name
merged_df.columns = [
col + "_" if is_id_header(col) else col for col in merged_df.columns
]

# Filter data by MS level
merged_df = merged_df.loc[merged_df["msLevel"] == int(ms_level)]

if ms_level == "1":
# Drop columns that only contain information about MS2 scans
columns_to_drop = [
"precScanNum",
"precursorMz",
"precursorIntensity",
"precursorCharge",
"collisionEnergy",
"isolationWindowLowerMz",
"isolationWindowTargetMz",
"isolationWindowUpperMz",
]
merged_df.drop(
columns=[col for col in columns_to_drop if col in merged_df.columns]
)

return qiime2.Metadata(merged_df)
6 changes: 6 additions & 0 deletions q2_ms/xcms/tests/data/metadata/ms_backend_data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# MsBackendMzR
"msLevel" "rtime" "acquisitionNum" "dataOrigin" "centroided" "polarity" "precScanNum" "precursorMz" "precursorIntensity" "precursorCharge" "collisionEnergy" "isolationWindowLowerMz" "isolationWindowTargetMz" "isolationWindowUpperMz" "peaksCount" "totIonCurrent" "basePeakMZ" "basePeakIntensity" "ionisationEnergy" "lowMZ" "highMZ" "injectionTime" "filterString" "spectrumId" "scanWindowLowerLimit" "scanWindowUpperLimit" "rtime_adjusted" "dataStorage" "scanIndex"
"1" 1 0.106081408 1 "path/sample_1.mzML" TRUE 0 NA NA NA NA NA NA NA NA 909 19614582 197.807357788086 2139308 0 61.9880981445313 755.029235839844 50 "FTMS - c ESI Full ms [60.0000-900.0000]" "controllerType=0 controllerNumber=1 scan=1" 60 900 0.106081408 "path/sample_1.mzML" 1
"2" 1 0.536452816 2 "path/sample_2.mzML" TRUE 0 NA NA NA NA NA NA NA NA 968 20253388 197.807403564453 2154304.5 0 61.9871025085449 665.043334960938 50 "FTMS - c ESI Full ms [60.0000-900.0000]" "controllerType=0 controllerNumber=1 scan=2" 60 900 0.536452816 "path/sample_2.mzML" 2
"3" 2 0.613772143999998 3 "path/sample_3.mzML" TRUE 0 1 203.081527709961 811281.937988281 0 30 202.481527709961 203.081527709961 203.681527709961 8 73490.75 155.461517333984 9954.2333984375 0 66.4943084716797 155.461517333984 22 "FTMS - c ESI d Full ms2 203.0815@hcd30.00 [52.0000-214.0000]" "controllerType=0 controllerNumber=1 scan=3" 52 214 0.613772143999998 "path/sample_3.mzML" 3
"4" 2 0.719534496 4 "path/sample_4.mzML" TRUE 0 1 219.84489440918 152150.111816406 1 30 219.24489440918 219.84489440918 220.44489440918 4 74733 219.844863891602 46147.5546875 0 90.0349273681641 219.844863891602 22 "FTMS - c ESI d Full ms2 219.8449@hcd30.00 [53.0000-230.0000]" "controllerType=0 controllerNumber=1 scan=4" 53 230 0.719534496 "path/sample_4.mzML" 4
5 changes: 5 additions & 0 deletions q2_ms/xcms/tests/data/metadata/ms_experiment_sample_data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"sample_name" "sample_group" "sample_type" "spectraOrigin"
"1" "sample_1" "KO" "QC" "path/sample_1.mzML"
"2" "sample_2" "KO" "study" "path/sample_2.mzML"
"3" "sample_3" "WT" "QC" "path/sample_3.mzML"
"4" "sample_4" "WT" "study" "path/sample_4.mzML"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1 1
2 2
3 3
4 4
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id msLevel rtime acquisitionNum dataOrigin centroided polarity precScanNum precursorMz precursorIntensity precursorCharge collisionEnergy isolationWindowLowerMz isolationWindowTargetMz isolationWindowUpperMz peaksCount totIonCurrent basePeakMZ basePeakIntensity ionisationEnergy lowMZ highMZ injectionTime filterString spectrumId scanWindowLowerLimit scanWindowUpperLimit rtime_adjusted dataStorage scanIndex sample_id sample_name_ sample_group sample_type rtime_adjusted-rtime
1 1 0.106081408 1 path/sample_1.mzML True 0 909 19614582.0 197.807357788086 2139308.0 0 61.9880981445313 755.029235839844 50 FTMS - c ESI Full ms [60.0000-900.0000] controllerType=0 controllerNumber=1 scan=1 60 900 0.106081408 path/sample_1.mzML 1 1 sample_1 KO QC 0.0
2 1 0.536452816 2 path/sample_2.mzML True 0 968 20253388.0 197.807403564453 2154304.5 0 61.9871025085449 665.043334960938 50 FTMS - c ESI Full ms [60.0000-900.0000] controllerType=0 controllerNumber=1 scan=2 60 900 0.536452816 path/sample_2.mzML 2 2 sample_2 KO study 0.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id msLevel rtime acquisitionNum dataOrigin polarity precScanNum precursorMz precursorIntensity precursorCharge collisionEnergy isolationWindowLowerMz isolationWindowTargetMz isolationWindowUpperMz peaksCount totIonCurrent basePeakMZ basePeakIntensity ionisationEnergy lowMZ highMZ injectionTime filterString spectrumId scanWindowLowerLimit scanWindowUpperLimit dataStorage scanIndex sample_id sample_name_ sample_group sample_type
1 1.0 0.106081408 1.0 path/sample_1.mzML 0.0 909.0 19614582.0 197.807357788086 2139308.0 0.0 61.9880981445313 755.029235839844 50.0 FTMS - c ESI Full ms [60.0000-900.0000] controllerType=0 controllerNumber=1 scan=1 60.0 900.0 path/sample_1.mzML 1.0 1.0 sample_1 KO QC
2 1.0 0.536452816 2.0 path/sample_2.mzML 0.0 968.0 20253388.0 197.807403564453 2154304.5 0.0 61.9871025085449 665.043334960938 50.0 FTMS - c ESI Full ms [60.0000-900.0000] controllerType=0 controllerNumber=1 scan=2 60.0 900.0 path/sample_2.mzML 2.0 2.0 sample_2 KO study
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id msLevel rtime acquisitionNum dataOrigin centroided polarity precScanNum precursorMz precursorIntensity precursorCharge collisionEnergy isolationWindowLowerMz isolationWindowTargetMz isolationWindowUpperMz peaksCount totIonCurrent basePeakMZ basePeakIntensity ionisationEnergy lowMZ highMZ injectionTime filterString spectrumId scanWindowLowerLimit scanWindowUpperLimit rtime_adjusted dataStorage scanIndex sample_id sample_name_ sample_group sample_type rtime_adjusted-rtime
3 2.0 0.613772143999998 3.0 path/sample_3.mzML True 0.0 1.0 203.081527709961 811281.937988281 0.0 30.0 202.481527709961 203.081527709961 203.681527709961 8.0 73490.75 155.461517333984 9954.2333984375 0.0 66.4943084716797 155.461517333984 22.0 FTMS - c ESI d Full ms2 203.0815@hcd30.00 [52.0000-214.0000] controllerType=0 controllerNumber=1 scan=3 52.0 214.0 0.613772143999998 path/sample_3.mzML 3.0 3.0 sample_3 WT QC 0.0
4 2.0 0.719534496 4.0 path/sample_4.mzML True 0.0 1.0 219.84489440918 152150.111816406 1.0 30.0 219.24489440918 219.84489440918 220.44489440918 4.0 74733.0 219.844863891602 46147.5546875 0.0 90.0349273681641 219.844863891602 22.0 FTMS - c ESI d Full ms2 219.8449@hcd30.00 [53.0000-230.0000] controllerType=0 controllerNumber=1 scan=4 53.0 230.0 0.719534496 path/sample_4.mzML 4.0 4.0 sample_4 WT study 0.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# MsBackendMzR
"msLevel" "rtime" "acquisitionNum" "dataOrigin" "polarity" "precScanNum" "precursorMz" "precursorIntensity" "precursorCharge" "collisionEnergy" "isolationWindowLowerMz" "isolationWindowTargetMz" "isolationWindowUpperMz" "peaksCount" "totIonCurrent" "basePeakMZ" "basePeakIntensity" "ionisationEnergy" "lowMZ" "highMZ" "injectionTime" "filterString" "spectrumId" "scanWindowLowerLimit" "scanWindowUpperLimit" "dataStorage" "scanIndex"
"1" 1 0.106081408 1 "path/sample_1.mzML" 0 NA NA NA NA NA NA NA NA 909 19614582 197.807357788086 2139308 0 61.9880981445313 755.029235839844 50 "FTMS - c ESI Full ms [60.0000-900.0000]" "controllerType=0 controllerNumber=1 scan=1" 60 900 "path/sample_1.mzML" 1
"2" 1 0.536452816 2 "path/sample_2.mzML" 0 NA NA NA NA NA NA NA NA 968 20253388 197.807403564453 2154304.5 0 61.9871025085449 665.043334960938 50 "FTMS - c ESI Full ms [60.0000-900.0000]" "controllerType=0 controllerNumber=1 scan=2" 60 900 "path/sample_2.mzML" 2
"3" 2 0.613772143999998 3 "path/sample_3.mzML" 0 1 203.081527709961 811281.937988281 0 30 202.481527709961 203.081527709961 203.681527709961 8 73490.75 155.461517333984 9954.2333984375 0 66.4943084716797 155.461517333984 22 "FTMS - c ESI d Full ms2 203.0815@hcd30.00 [52.0000-214.0000]" "controllerType=0 controllerNumber=1 scan=3" 52 214 "path/sample_3.mzML" 3
"4" 2 0.719534496 4 "path/sample_4.mzML" 0 1 219.84489440918 152150.111816406 1 30 219.24489440918 219.84489440918 220.44489440918 4 74733 219.844863891602 46147.5546875 0 90.0349273681641 219.844863891602 22 "FTMS - c ESI d Full ms2 219.8449@hcd30.00 [53.0000-230.0000]" "controllerType=0 controllerNumber=1 scan=4" 53 230 "path/sample_4.mzML" 4
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"sample_name" "sample_group" "sample_type" "spectraOrigin"
"1" "sample_1" "KO" "QC" "path/sample_1.mzML"
"2" "sample_2" "KO" "study" "path/sample_2.mzML"
"3" "sample_3" "WT" "QC" "path/sample_3.mzML"
"4" "sample_4" "WT" "study" "path/sample_4.mzML"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1 1
2 2
3 3
4 4
63 changes: 63 additions & 0 deletions q2_ms/xcms/tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2025, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os

import pandas as pd
from pandas._testing import assert_frame_equal
from qiime2.plugin.testing import TestPluginBase

from q2_ms.types import XCMSExperimentDirFmt
from q2_ms.xcms.metadata import create_spectral_metadata


class TestMetadata(TestPluginBase):
package = "q2_ms.xcms.tests"

def test_create_spectral_metadata(self):
xcms_experiment = XCMSExperimentDirFmt(self.get_data_path("metadata"), "r")
obs = create_spectral_metadata(xcms_experiment)
self._test_create_spectral_metadata_helper(obs, "spectral_metadata_ms1.tsv")

def test_create_spectral_metadata_no_centroided_rt_adjusted(self):
xcms_experiment = XCMSExperimentDirFmt(
self.get_data_path("metadata_no_centroided_rt"), "r"
)
obs = create_spectral_metadata(xcms_experiment)
self._test_create_spectral_metadata_helper(
obs, "spectral_metadata_ms1_no_centroided_rt_adjusted.tsv"
)

def test_create_spectral_metadata_ms2(self):
xcms_experiment = XCMSExperimentDirFmt(self.get_data_path("metadata"), "r")
obs = create_spectral_metadata(xcms_experiment, "2")
self._test_create_spectral_metadata_helper(obs, "spectral_metadata_ms2.tsv")

def _test_create_spectral_metadata_helper(self, obs, exp_metadata):
exp = pd.read_csv(
self.get_data_path(os.path.join("metadata_expected", exp_metadata)),
sep="\t",
index_col=0,
)
exp.index = exp.index.astype(str)
columns_to_convert = [
"msLevel",
"acquisitionNum",
"polarity",
"peaksCount",
"ionisationEnergy",
"injectionTime",
"scanWindowLowerLimit",
"scanWindowUpperLimit",
"scanIndex",
"sample_id",
]
exp[columns_to_convert] = exp[columns_to_convert].astype("float64")
if "centroided" in exp.columns:
exp["centroided"] = exp["centroided"].astype("str")

assert_frame_equal(obs.to_dataframe(), exp)
Loading