diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b3ae6e6..2182937a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added a BAM/CRAM conversion to FASTQ step to run tools that depend on FASTQ files. It still is advised to supply a FASTQ file if possible to reduce runtime of the pipeline. [#783](https://github.com/nf-core/rnafusion/pull/783) +### Fixed + +- Fix issue with vcf_collect when no fusions are found by fusioninspector [#786](https://github.com/nf-core/rnafusion/pull/786) + ## v4.0.0 - [2025-09-10] ### Added diff --git a/bin/vcf_collect.py b/bin/vcf_collect.py index 292c052f..437a1056 100755 --- a/bin/vcf_collect.py +++ b/bin/vcf_collect.py @@ -84,16 +84,14 @@ def vcf_collect( all_df = df.merge( gtf_df, how="left", left_on="CDS_LEFT_ID", right_on="Transcript_id" ) - all_df[["PosA", "orig_start", "orig_end"]] = ( - all_df[["PosA", "orig_start", "orig_end"]].fillna(0).astype(int) - ) all_df = all_df[ ( (all_df["PosA"] >= all_df["orig_start"]) & (all_df["PosA"] <= all_df["orig_end"]) ) - | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0)) + | ((all_df["orig_start"].isna()) & (all_df["orig_end"].isna())) + | (all_df["PosA"].isna()) ] all_df["Left_transcript_version"] = all_df["CDS_LEFT_ID"].astype(str).str.split(".").str[-1] @@ -145,21 +143,16 @@ def vcf_collect( all_df = all_df.merge( gtf_df, how="left", left_on="CDS_RIGHT_ID", right_on="Transcript_id" ) - all_df[["PosB", "orig_start", "orig_end"]] = all_df[ - ["PosB", "orig_start", "orig_end"] - ].fillna(0) - all_df[["PosB", "orig_start", "orig_end"]] = all_df[ - ["PosB", "orig_start", "orig_end"] - ].astype(int) + all_df = all_df[ ( (all_df["PosB"] >= all_df["orig_start"]) & (all_df["PosB"] <= all_df["orig_end"]) ) - | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0)) + | ((all_df["orig_start"].isna()) & (all_df["orig_end"].isna())) + | (all_df["PosB"].isna()) ] - all_df[["PosA", "PosB"]] = all_df[["PosA", "PosB"]].replace(0, np.nan) all_df = all_df.replace("", np.nan) all_df["Right_transcript_version"] = all_df["CDS_RIGHT_ID"].astype(str).str.split(".").str[-1] @@ -213,6 +206,9 @@ def vcf_collect( all_df = all_df.combine_first(read_fusionreport_csv(fusionreport_csv)) + # Filter out invalid entries with missing positional values + all_df = all_df[all_df["PosA"].notna() & all_df["PosB"].notna() & all_df["ChromosomeA"].notna() & all_df["ChromosomeB"].notna()] + return write_vcf(column_manipulation(all_df), header_def(sample), out_file) diff --git a/modules/local/vcf_collect/tests/main.nf.test.snap b/modules/local/vcf_collect/tests/main.nf.test.snap index 1e203d0c..9b912c67 100644 --- a/modules/local/vcf_collect/tests/main.nf.test.snap +++ b/modules/local/vcf_collect/tests/main.nf.test.snap @@ -61,8 +61,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nextflow": "25.10.2" }, - "timestamp": "2025-08-12T20:45:25.283350308" + "timestamp": "2026-01-08T17:24:54.112204263" } -} +} \ No newline at end of file