diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 6b9a6d5e9..85728bcce 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -33,7 +33,7 @@ jobs: parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ steps.revision.outputs.revision }}" + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ steps.revision.outputs.revision }}", "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/references", "tools": "all", "no_cosmic": true diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index e796a814c..82ba8fdfc 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -64,7 +64,7 @@ jobs: runs-on: # use self-hosted runners - runs-on=${{ github.run_id }}-nf-test - runner=4cpu-linux-x64 - - disk=large + - volume=80gb strategy: fail-fast: false matrix: diff --git a/.nf-core.yml b/.nf-core.yml index 4f80b3e13..b16d2b028 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -16,4 +16,4 @@ template: name: rnafusion org: nf-core outdir: . - version: 4.0.0 + version: 4.1.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e1eb9277..75601948e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v4.1.0 - [2026-02-09] + +### Added + +- Added a BAM/CRAM conversion to FASTQ step to run tools that depend on FASTQ files. It still is advised to supply a FASTQ file if possible to reduce runtime of the pipeline. [#783](https://github.com/nf-core/rnafusion/pull/783) + +### Fixed + +- Fix issue with vcf_collect when no fusions are found by fusioninspector [#786](https://github.com/nf-core/rnafusion/pull/786) + ## v4.0.0 - [2025-09-10] ### Added diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 73d0edd30..78f4441b2 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,8 +1,5 @@ report_comment: > - This report has been generated by the nf-core/rnafusion analysis pipeline. For information about how - to interpret these results, please see the documentation. + This report has been generated by the nf-core/rnafusion analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: nf-core-rnafusion-methods-description: diff --git a/assets/schema_input.json b/assets/schema_input.json index c0ff9aea6..3d8005611 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -32,6 +32,7 @@ "format": "file-path", "exists": true, "pattern": "^\\S+\\.bam$", + "description": "BAM file created with STAR. This file should be created from paired-end reads and junctions should be written out separately.", "errorMessage": "BAM file cannot contain spaces, has to exist and must have extension '.bam'" }, "bai": { @@ -46,6 +47,7 @@ "format": "file-path", "exists": true, "pattern": "^\\S+\\.cram$", + "description": "CRAM file created with STAR. This file should be created from paired-end reads and junctions should be written out separately.", "errorMessage": "CRAM file cannot contain spaces, has to exist and must have extension '.cram'" }, "crai": { @@ -60,6 +62,7 @@ "format": "file-path", "exists": true, "pattern": "^\\S+\\.junction$", + "description": "Junctions file created with STAR. This file should be created from paired-end reads.", "errorMessage": "Junctions file cannot contain spaces, has to exist and must have extension '.junction'" }, "splice_junctions": { @@ -67,6 +70,7 @@ "format": "file-path", "exists": true, "pattern": "^\\S+\\.SJ.out.tab$", + "description": "Splice junctions file created with STAR. This file should be created from paired-end reads.", "errorMessage": "Split junctions file cannot contain spaces, has to exist and must have extension '.SJ.out.tab'" }, "strandedness": { diff --git a/bin/vcf_collect.py b/bin/vcf_collect.py index 292c052fd..437a1056e 100755 --- a/bin/vcf_collect.py +++ b/bin/vcf_collect.py @@ -84,16 +84,14 @@ def vcf_collect( all_df = df.merge( gtf_df, how="left", left_on="CDS_LEFT_ID", right_on="Transcript_id" ) - all_df[["PosA", "orig_start", "orig_end"]] = ( - all_df[["PosA", "orig_start", "orig_end"]].fillna(0).astype(int) - ) all_df = all_df[ ( (all_df["PosA"] >= all_df["orig_start"]) & (all_df["PosA"] <= all_df["orig_end"]) ) - | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0)) + | ((all_df["orig_start"].isna()) & (all_df["orig_end"].isna())) + | (all_df["PosA"].isna()) ] all_df["Left_transcript_version"] = all_df["CDS_LEFT_ID"].astype(str).str.split(".").str[-1] @@ -145,21 +143,16 @@ def vcf_collect( all_df = all_df.merge( gtf_df, how="left", left_on="CDS_RIGHT_ID", right_on="Transcript_id" ) - all_df[["PosB", "orig_start", "orig_end"]] = all_df[ - ["PosB", "orig_start", "orig_end"] - ].fillna(0) - all_df[["PosB", "orig_start", "orig_end"]] = all_df[ - ["PosB", "orig_start", "orig_end"] - ].astype(int) + all_df = all_df[ ( (all_df["PosB"] >= all_df["orig_start"]) & (all_df["PosB"] <= all_df["orig_end"]) ) - | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0)) + | ((all_df["orig_start"].isna()) & (all_df["orig_end"].isna())) + | (all_df["PosB"].isna()) ] - all_df[["PosA", "PosB"]] = all_df[["PosA", "PosB"]].replace(0, np.nan) all_df = all_df.replace("", np.nan) all_df["Right_transcript_version"] = all_df["CDS_RIGHT_ID"].astype(str).str.split(".").str[-1] @@ -213,6 +206,9 @@ def vcf_collect( all_df = all_df.combine_first(read_fusionreport_csv(fusionreport_csv)) + # Filter out invalid entries with missing positional values + all_df = all_df[all_df["PosA"].notna() & all_df["PosB"].notna() & all_df["ChromosomeA"].notna() & all_df["ChromosomeB"].notna()] + return write_vcf(column_manipulation(all_df), header_def(sample), out_file) diff --git a/docs/usage.md b/docs/usage.md index 0ce3485be..03a4dbaab 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -140,8 +140,8 @@ TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,,forward | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | | `sample` | Custom sample name. This value needs to be unique across all entries in the samplesheet and cannot contain spaces | ✅ | | `strandedness` | Strandedness: forward or reverse. | ✅ | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File must exist, has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". It's recommended to always provide the FASTQ files because the pipeline will be able to create any missing files from these. The FASTQ files are required to run `salmon`, `fusioninspector` and `fusioncatcher`. | ❓ | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File must exist, has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". It's recommended to always provide the FASTQ files because the pipeline will be able to create any missing files from these. The FASTQ files are required to run `salmon`, `fusioninspector` and `fusioncatcher`". | ❓ | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File must exist, has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". It's recommended to always provide the FASTQ files because the pipeline will be able to create any missing files from these. This should be supplied to let the pipeline run faster, but they can be automatically created from the availabe BAM or CRAM files. | ❓ | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File must exist, has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". It's recommended to always provide the FASTQ files because the pipeline will be able to create any missing files from these. This should be supplied to let the pipeline run faster, but they can be automatically created from the availabe BAM or CRAM files. | ❓ | | `bam` | Full path to the BAM file created with STAR. File has to exist and must have the extension ".bam". It's the responsibility of the pipeline user to make sure this file has been correctly created, see the [prepare chapter](#preparing-bamcramjunctionssplice_junctions) for more information. The BAM file is required to run `ctatsplicing`, `stringtie`, `fusioninspector` and `arriba` when the `fastq_1` and `cram` fields are empty. | ❓ | | `bai` | Full path to the index of the BAM file. File has to exist and must have the extension ".bai". | ❌ | | `cram` | Full path to the CRAM file created with STAR. File has to exist and must have the extension ".cram". It's the responsibility of the pipeline user to make sure this file has been correctly created, see the [prepare chapter](#preparing-bamcramjunctionssplice_junctions) for more information. The CRAM file is required to run `ctatsplicing`, `stringtie`, `fusioninspector` and `arriba` when the `fastq_1` and `bam` fields are empty. | ❓ | diff --git a/modules.json b/modules.json index b1ddae700..a5bcbdb14 100644 --- a/modules.json +++ b/modules.json @@ -133,6 +133,11 @@ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, + "samtools/collatefastq": { + "branch": "master", + "git_sha": "c8be52dba1166c678e74cda9c3a3c221635c8bb1", + "installed_by": ["modules"] + }, "samtools/convert": { "branch": "master", "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", diff --git a/modules/local/vcf_collect/tests/main.nf.test.snap b/modules/local/vcf_collect/tests/main.nf.test.snap index 1e203d0c1..9b912c676 100644 --- a/modules/local/vcf_collect/tests/main.nf.test.snap +++ b/modules/local/vcf_collect/tests/main.nf.test.snap @@ -61,8 +61,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nextflow": "25.10.2" }, - "timestamp": "2025-08-12T20:45:25.283350308" + "timestamp": "2026-01-08T17:24:54.112204263" } -} +} \ No newline at end of file diff --git a/modules/nf-core/samtools/collatefastq/environment.yml b/modules/nf-core/samtools/collatefastq/environment.yml new file mode 100644 index 000000000..89e12a645 --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/collatefastq/main.nf b/modules/nf-core/samtools/collatefastq/main.nf new file mode 100644 index 000000000..95a84b5bf --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/main.nf @@ -0,0 +1,76 @@ +process SAMTOOLS_COLLATEFASTQ { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(input) + tuple val(meta2), path(fasta) + val(interleave) + + output: + tuple val(meta), path("*_{1,2}.fq.gz") , optional:true, emit: fastq + tuple val(meta), path("*_interleaved.fq") , optional:true, emit: fastq_interleaved + tuple val(meta), path("*_other.fq.gz") , emit: fastq_other + tuple val(meta), path("*_singleton.fq.gz") , optional:true, emit: fastq_singleton + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def output = (interleave && ! meta.single_end) ? "> ${prefix}_interleaved.fq" : + meta.single_end ? "-1 ${prefix}_1.fq.gz -s ${prefix}_singleton.fq.gz" : + "-1 ${prefix}_1.fq.gz -2 ${prefix}_2.fq.gz -s ${prefix}_singleton.fq.gz" + + """ + samtools collate \\ + $args \\ + --threads $task.cpus \\ + ${reference} \\ + -O \\ + $input \\ + . | + + samtools fastq \\ + $args2 \\ + --threads $task.cpus \\ + ${reference} \\ + -0 ${prefix}_other.fq.gz \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def empty = "echo '' | gzip " + def singletoncommand = "${empty}> ${prefix}_singleton.fq.gz" + def interleavecommand = interleave && !meta.single_end ? "${empty}> ${prefix}_interleaved.fq.gz" : "" + def output1command = !interleave ? "${empty}> ${prefix}_1.fq.gz" : "" + def output2command = !interleave && !meta.single_end ? "${empty}> ${prefix}_2.fq.gz" : "" + + """ + ${output1command} + ${output2command} + ${interleavecommand} + ${singletoncommand} + ${empty}> ${prefix}_other.fq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/collatefastq/meta.yml b/modules/nf-core/samtools/collatefastq/meta.yml new file mode 100644 index 000000000..10325a614 --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/meta.yml @@ -0,0 +1,109 @@ +name: samtools_collatefastq +description: | + The module uses collate and then fastq methods from samtools to + convert a SAM, BAM or CRAM file to FASTQ format +keywords: + - bam2fq + - samtools + - fastq +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: "http://www.htslib.org" + documentation: "https://www.htslib.org/doc/samtools.html" + tool_dev_url: "https://github.com/samtools/samtools" + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + identifier: biotools:samtools + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" + ontologies: [] + - interleave: + type: boolean + description: | + If true, the output is a single interleaved paired-end FASTQ + If false, the output split paired-end FASTQ + default: false +output: + fastq: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_{1,2}.fq.gz": + type: file + description: | + R1 and R2 FASTQ files + pattern: "*_{1,2}.fq.gz" + ontologies: [] + fastq_interleaved: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_interleaved.fq": + type: file + description: | + Interleaved paired end FASTQ files + pattern: "*_interleaved.fq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + fastq_other: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_other.fq.gz": + type: file + description: | + FASTQ files with reads where the READ1 and READ2 FLAG bits set are either both set or both unset. + pattern: "*_other.fq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + fastq_singleton: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_singleton.fq.gz": + type: file + description: | + FASTQ files with singleton reads. + pattern: "*_singleton.fq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@lescai" + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/samtools/collatefastq/tests/main.nf.test b/modules/nf-core/samtools/collatefastq/tests/main.nf.test new file mode 100644 index 000000000..bc66ebf6e --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/tests/main.nf.test @@ -0,0 +1,242 @@ +nextflow_process { + + name "Test Process SAMTOOLS_COLLATEFASTQ" + script "../main.nf" + process "SAMTOOLS_COLLATEFASTQ" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/collatefastq" + + test("human - bam - paired_end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq, + process.out.versions).match() } + ) + } + + } + + test("human - bam - single_end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq_other, + process.out.versions).match() } + ) + } + + } + + test("human - cram") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq, + process.out.versions).match() } + ) + } + + } + + test("human - bam - paired_end - interleaved") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq_interleaved, + process.out.fastq_singleton, + process.out.fastq, + process.out.versions).match() } + ) + } + + } + + test("human - bam - paired_end -stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq, + process.out.versions).match() } + ) + } + + } + + test("human - bam - single_end - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq_other, + process.out.versions).match() } + ) + } + + } + + test("human - cram - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq, + process.out.versions).match() } + ) + } + + } + + test("human - bam - paired_end - interleaved - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq_interleaved, + process.out.fastq_singleton, + process.out.fastq, + process.out.versions).match() } + ) + } + + } + + + +} diff --git a/modules/nf-core/samtools/collatefastq/tests/main.nf.test.snap b/modules/nf-core/samtools/collatefastq/tests/main.nf.test.snap new file mode 100644 index 000000000..77eb11808 --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/tests/main.nf.test.snap @@ -0,0 +1,194 @@ +{ + "human - bam - paired_end - interleaved - stub": { + "content": [ + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_singleton.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + + ], + [ + "versions.yml:md5,c19c4bbd5d3ee480626a02774e2851e5" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T13:04:12.998" + }, + "human - bam - single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_other.fq.gz:md5,a6c101a06b5c9d5f8b91c0acd4ac5045" + ] + ], + [ + "versions.yml:md5,c19c4bbd5d3ee480626a02774e2851e5" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T13:03:43.667571" + }, + "human - bam - paired_end -stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + [ + "versions.yml:md5,c19c4bbd5d3ee480626a02774e2851e5" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T13:03:59.764256" + }, + "human - cram - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + [ + "versions.yml:md5,c19c4bbd5d3ee480626a02774e2851e5" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T13:04:08.94506" + }, + "human - cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fq.gz:md5,1cf671980643af6c1148ae5e8e94e350", + "test_2.fq.gz:md5,38c1e9829115f9025f95435c5a4373d3" + ] + ] + ], + [ + "versions.yml:md5,c19c4bbd5d3ee480626a02774e2851e5" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T13:03:49.929926" + }, + "human - bam - paired_end - interleaved": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_interleaved.fq:md5,4f2b93d492f0442fa89b02532c9b3530" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,c19c4bbd5d3ee480626a02774e2851e5" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T13:03:55.67649" + }, + "human - bam - single_end - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_other.fq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + "versions.yml:md5,c19c4bbd5d3ee480626a02774e2851e5" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T13:04:03.902512" + }, + "human - bam - paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fq.gz:md5,1cf671980643af6c1148ae5e8e94e350", + "test_2.fq.gz:md5,38c1e9829115f9025f95435c5a4373d3" + ] + ] + ], + [ + "versions.yml:md5,c19c4bbd5d3ee480626a02774e2851e5" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T13:03:37.311403" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 8f5370a58..0872f4f9b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -356,8 +356,8 @@ manifest { mainScript = 'main.nf' defaultBranch = 'master' nextflowVersion = '!>=24.10.5' - version = '4.0.0' - doi = '' + version = '4.1.0' + doi = '10.5281/zenodo.17641566' } // Nextflow plugins diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index ee93551dc..6cf8fecb1 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,7 +22,7 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2025-09-10T11:04:42+00:00", + "datePublished": "2026-02-09T08:51:15+00:00", "description": "

\n \n \n \"nf-core/rnafusion\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/rnafusion/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/rnafusion/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/rnafusion/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/rnafusion/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnafusion/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2565517-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2565517)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/rnafusion)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnafusion-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/rnafusion)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/rnafusion** is a bioinformatics best-practice analysis pipeline for RNA sequencing consisting of several tools designed for detecting and visualizing fusion genes. Results from fusion callers tools (STAR-Fusion, arriba and FusionCatcher) are created, and are also aggregated, most notably in a pdf visualisation document, a vcf data collection file, and html and tsv reports. In parallel StringTie and CTAT-Splicing collect additional information on splicing events.\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/rnafusion/results).\n\nIn rnafusion the full-sized test includes reference building and fusion detection. The test dataset is taken from [here](https://github.com/nf-core/test-datasets/tree/rnafusion/testdata/human).\n\n## Pipeline summary\n\n![nf-core/rnafusion metro map](docs/images/nf-core-rnafusion_metro_map.png)\n\n### References\n\nThe references for the pipeline can be downloaded from the nf-core AWS megatests S3 bucket using the following command for the [AWS CLI tool](https://github.com/aws/aws-cli):\n\n```bash\naws --no-sign-request s3 sync s3://nf-core-awsmegatests/rnafusion/references/ \n```\n\nThe path to the downloaded references can then be provided to the pipeline with the `--genomes_base` parameter.\n\n\u26a0\ufe0f **Please note that the references are large and can take a long time to download, so it is recommended to download them once and use them for all future runs of the pipeline.**\n\nThe pipeline is also able to build the references in case files from a specific gencode version are missing (Note: only gencode 46 is available for fusioncatcher). This is done automatically when the expected references are not found and these files will be automatically published in the directory specified by the `--genomes_base` parameter. Use the `--references_only` parameter to trigger the reference building workflow only, without running the rest of the pipeline.\n\n1. Download gencode fasta and gtf files\n2. Download the HGNC nomenclature file\n3. Create files needed for QC (Sequence Dictionary and RRNA intervals)\n4. Convert the gtf file to a refflat file\n5. Create the [Salmon](https://salmon.readthedocs.io/en/latest/) index\n6. Create [STAR](https://github.com/alexdobin/STAR) index\n7. Build [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) and [CTAT-SPLICING](https://github.com/TrinityCTAT/CTAT-SPLICING) references\n8. Download [Fusion-report](https://github.com/Clinical-Genomics/fusion-report) DBs\n\n> [!WARNING]\n> References for Fusioncatcher and Arriba cannot be automatically created by the pipeline and should be downloaded from the S3 bucket or another source. See the [References](#references) section for more information.\n\n#### Main workflow\n\n1. Input samplesheet check\n2. Reads quality control ([FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n3. Optional trimming with [fastp](https://github.com/OpenGene/fastp)\n4. Align FASTQs to BAM with [STAR](https://github.com/alexdobin/STAR)\n5. Run fusion detection with [Arriba](https://github.com/suhrig/arriba)\n6. Run fusion detection with [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion)\n 7a. Optional trimming of 3' end with [fastp](https://github.com/OpenGene/fastp) to feed into fusioncatcher (other tools not affected)\n 7b. Run fusion detection with [FusionCatcher](https://github.com/ndaniel/fusioncatcher)\n7. Run transcript assembly and quantification with [StringTie](https://ccb.jhu.edu/software/stringtie/)\n8. Run cancer splicing aberrations detection with [CTAT-SPLICING](https://github.com/TrinityCTAT/CTAT-SPLICING)\n9. Merge all fusions detected by the selected tools with [Fusion-report](https://github.com/Clinical-Genomics/fusion-report)\n10. Post-processing and analysis of data\n - [FusionInspector](https://github.com/FusionInspector/FusionInspector)\n - Summarize information into a VCF file\n - [Arriba](https://github.com/suhrig/arriba) visualisation\n - Collect metrics ([`picard CollectRnaSeqMetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360037057492-CollectRnaSeqMetrics-Picard-)), [`picard CollectInsertSizeMetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360037055772-CollectInsertSizeMetrics-Picard-) and ([`GATK MarkDuplicates`](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-))\n11. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n12. Compress bam files to cram with [samtools view](http://www.htslib.org/)\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nAs the reference building is computationally heavy (> 24h on HPC), we had to use dummy reference files in the test profile. Therefore, it is recommended to run the test profile with the `-stub` option.\n\n```bash\nnextflow run nf-core/rnafusion \\\n -profile test, \\\n --outdir \\\n -stub\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n> **Notes:**\n>\n> - Conda is not currently supported; run with singularity or docker.\n> - Paths need to be absolute.\n> - GRCh38 is the only supported reference.\n> - Single-end reads are to be used as last-resort. Paired-end reads are recommended. FusionCatcher cannot be used with single-end reads shorter than 130 bp.\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rnafusion/usage) and the [parameter documentation](https://nf-co.re/rnafusion/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/rnafusion/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/rnafusion/output).\n\n## Credits\n\nnf-core/rnafusion was written by Martin Proks ([@matq007](https://github.com/matq007)), Maxime Garcia ([@maxulysse](https://github.com/maxulysse)) and Annick Renevey ([@rannick](https://github.com/rannick))\n\n## We thank the following people for their help in the development of this pipeline\n\n- [Phil Ewels](https://github.com/ewels)\n- [Rickard Hammar\u00e9n](https://github.com/Hammarn)\n- [Alexander Peltzer](https://github.com/apeltzer)\n- [Praveen Raj](https://github.com/praveenraj2018)\n- [Anabella Trigila](https://github.com/atrigila)\n- [Nicolas Vannieuwkerke](https://github.com/nvnieuwk)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#rnafusion` channel](https://nfcore.slack.com/channels/rnafusion) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/rnafusion for your analysis, please cite it using the following doi: [10.5281/zenodo.3946477](https://doi.org/10.5281/zenodo.3946477)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { @@ -108,7 +108,7 @@ }, "mentions": [ { - "@id": "#22caf67a-5268-45f3-acca-9cff28a06eda" + "@id": "#873aa31f-c681-4833-8005-eee6abefc308" } ], "name": "nf-core/rnafusion" @@ -137,26 +137,26 @@ ], "creator": [ { - "@id": "#max.u.garcia@gmail.com" + "@id": "https://orcid.org/0000-0001-9017-591X" }, { - "@id": "https://orcid.org/0000-0001-7411-6063" + "@id": "https://orcid.org/0009-0003-5619-1555" }, { - "@id": "https://orcid.org/0000-0002-8178-3128" + "@id": "https://orcid.org/0000-0001-7411-6063" }, { - "@id": "#phil.ewels@scilifelab.se" + "@id": "#max.u.garcia@gmail.com" }, { - "@id": "https://orcid.org/0009-0003-5619-1555" + "@id": "#phil.ewels@scilifelab.se" }, { - "@id": "https://orcid.org/0000-0001-9017-591X" + "@id": "https://orcid.org/0000-0002-8178-3128" } ], "dateCreated": "", - "dateModified": "2025-09-10T13:04:42Z", + "dateModified": "2026-02-09T09:51:15Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "image": { "@id": "docs/images/nf-core-rnafusion_metro_map.png" @@ -175,13 +175,13 @@ ], "maintainer": [ { - "@id": "https://orcid.org/0000-0001-7411-6063" + "@id": "https://orcid.org/0009-0003-5619-1555" }, { - "@id": "https://orcid.org/0000-0002-8178-3128" + "@id": "https://orcid.org/0000-0001-7411-6063" }, { - "@id": "https://orcid.org/0009-0003-5619-1555" + "@id": "https://orcid.org/0000-0002-8178-3128" } ], "name": [ @@ -195,10 +195,10 @@ }, "url": [ "https://github.com/nf-core/rnafusion", - "https://nf-co.re/rnafusion/4.0.0/" + "https://nf-co.re/rnafusion/4.1.0/" ], "version": [ - "4.0.0" + "4.1.0" ] }, { @@ -222,11 +222,11 @@ "name": "Workflow diagram" }, { - "@id": "#22caf67a-5268-45f3-acca-9cff28a06eda", + "@id": "#873aa31f-c681-4833-8005-eee6abefc308", "@type": "TestSuite", "instance": [ { - "@id": "#ee1eeb2c-e0a3-45fc-8770-0e80c9d1489b" + "@id": "#0ac00120-f824-4d74-8494-e99c2d6cbfa1" } ], "mainEntity": { @@ -235,7 +235,7 @@ "name": "Test suite for nf-core/rnafusion" }, { - "@id": "#ee1eeb2c-e0a3-45fc-8770-0e80c9d1489b", + "@id": "#0ac00120-f824-4d74-8494-e99c2d6cbfa1", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/rnafusion", "resource": "repos/nf-core/rnafusion/actions/workflows/nf-test.yml", @@ -374,10 +374,16 @@ "url": "https://nf-co.re/" }, { - "@id": "#max.u.garcia@gmail.com", + "@id": "https://orcid.org/0000-0001-9017-591X", "@type": "Person", - "email": "max.u.garcia@gmail.com", - "name": "Maxime Garcia" + "email": "rickard.hammaren@scilifelab.se", + "name": "Rickard Hammar\u00e9n" + }, + { + "@id": "https://orcid.org/0009-0003-5619-1555", + "@type": "Person", + "email": "101190534+nvnieuwk@users.noreply.github.com", + "name": "Nicolas Vannieuwkerke" }, { "@id": "https://orcid.org/0000-0001-7411-6063", @@ -386,10 +392,10 @@ "name": "Annick Renevey" }, { - "@id": "https://orcid.org/0000-0002-8178-3128", + "@id": "#max.u.garcia@gmail.com", "@type": "Person", - "email": "mproksik@gmail.com", - "name": "Martin Proks" + "email": "max.u.garcia@gmail.com", + "name": "Maxime Garcia" }, { "@id": "#phil.ewels@scilifelab.se", @@ -398,16 +404,10 @@ "name": "Phil Ewels" }, { - "@id": "https://orcid.org/0009-0003-5619-1555", - "@type": "Person", - "email": "101190534+nvnieuwk@users.noreply.github.com", - "name": "Nicolas Vannieuwkerke" - }, - { - "@id": "https://orcid.org/0000-0001-9017-591X", + "@id": "https://orcid.org/0000-0002-8178-3128", "@type": "Person", - "email": "rickard.hammaren@scilifelab.se", - "name": "Rickard Hammar\u00e9n" + "email": "mproksik@gmail.com", + "name": "Martin Proks" } ] } \ No newline at end of file diff --git a/tests/test_build.nf.test.snap b/tests/test_build.nf.test.snap index 8eb7bdc62..1aef147e5 100644 --- a/tests/test_build.nf.test.snap +++ b/tests/test_build.nf.test.snap @@ -47,7 +47,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap index 65d5a3c2d..438727778 100644 --- a/tests/test_stub.nf.test.snap +++ b/tests/test_stub.nf.test.snap @@ -71,7 +71,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -308,7 +308,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -469,7 +469,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -643,7 +643,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -874,7 +874,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1006,7 +1006,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1270,7 +1270,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1493,7 +1493,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1570,7 +1570,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1645,7 +1645,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1808,7 +1808,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ diff --git a/tests/test_stub_bam.nf.test b/tests/test_stub_bam.nf.test index 036540242..0e397d56b 100644 --- a/tests/test_stub_bam.nf.test +++ b/tests/test_stub_bam.nf.test @@ -405,4 +405,44 @@ nextflow_pipeline { ) } } + + test("default no fastqs provided") { + + when { + params { + outdir = "$outputDir" + fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz' + input = "${projectDir}/tests/yml/cram_only.yml" + tools = "all" + no_cosmic = true + genomes_base = "${outputDir}/references" + arriba_ref_protein_domains = "https://github.com/nf-core/test-datasets/raw/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.5.0.gff3" + arriba_ref_known_fusions = "https://github.com/nf-core/test-datasets/raw/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.5.0.tsv.gz" + arriba_ref_blacklist = "https://github.com/nf-core/test-datasets/raw/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.5.0.tsv.gz" + arriba_ref_cytobands = "https://github.com/nf-core/test-datasets/raw/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.5.0.tsv" + fusioncatcher_ref = "${projectDir}/assets" // This is a placeholder until a small reference can be created + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + // def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') //TODO uncomment once -stub is removed + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnafusion_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + // stable_path // TODO uncomment once -stub is removed + ).match() } + ) + } + } } diff --git a/tests/test_stub_bam.nf.test.snap b/tests/test_stub_bam.nf.test.snap index c895d618b..23a0dd651 100644 --- a/tests/test_stub_bam.nf.test.snap +++ b/tests/test_stub_bam.nf.test.snap @@ -51,7 +51,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -243,7 +243,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -362,7 +362,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -491,7 +491,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -710,7 +710,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -831,7 +831,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1053,7 +1053,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1261,7 +1261,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1357,7 +1357,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1503,7 +1503,7 @@ "ucsc": 447 }, "Workflow": { - "nf-core/rnafusion": "v4.0.0" + "nf-core/rnafusion": "v4.1.0" } }, [ @@ -1732,8 +1732,346 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.7" + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-17T10:22:39.779724829" + }, + "default no fastqs provided": { + "content": [ + 32, + { + "AGAT_CONVERTGFF2BED": { + "agat": "v1.5.1" + }, + "ARRIBA_ARRIBA": { + "arriba": "2.5.0" + }, + "BIOAWK": { + "bioawk": 1.0 + }, + "CTATSPLICING_PREPGENOMELIB": { + "ctatsplicing": "0.0.2" + }, + "CTATSPLICING_STARTOCANCERINTRONS": { + "ctat-splicing": "0.0.2" + }, + "FASTP": { + "fastp": "0.24.0" + }, + "FASTQC_RAW": { + "fastqc": "0.12.1" + }, + "FASTQC_TRIM": { + "fastqc": "0.12.1" + }, + "FUSIONCATCHER_FUSIONCATCHER": { + "fusioncatcher": "1.35" + }, + "FUSIONINSPECTOR": { + "FusionInspector": "2.10.0" + }, + "FUSIONREPORT_DETECT": { + "fusion_report": "4.0.1" + }, + "FUSIONREPORT_DOWNLOAD": { + "fusion_report": "4.0.1" + }, + "GATK4_BEDTOINTERVALLIST": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "GENCODE_DOWNLOAD": { + "wget": "1.21.4" + }, + "GFFREAD": { + "gffread": "0.12.7" + }, + "HGNC_DOWNLOAD": { + "wget": "1.21.4" + }, + "PICARD_COLLECTINSERTSIZEMETRICS": { + "picard": "3.3.0" + }, + "PICARD_COLLECTRNASEQMETRICS": { + "picard": "3.3.0" + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SALMON_QUANT": { + "salmon": "1.10.3" + }, + "SAMTOOLS_COLLATEFASTQ": { + "samtools": "1.22.1" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "STARFUSION_BUILD": { + "gunzip": 1.12, + "hmmer": 3.4, + "STAR-Fusion": "1.15.1" + }, + "STARFUSION_DETECT": { + "STAR-Fusion": "1.15.1" + }, + "STAR_GENOMEGENERATE": { + "star": "2.7.11b", + "samtools": 1.21, + "gawk": "5.1.0" + }, + "STRINGTIE_MERGE": { + "stringtie": "2.2.1" + }, + "STRINGTIE_STRINGTIE": { + "stringtie": "2.2.3" + }, + "UCSC_GTFTOGENEPRED": { + "ucsc": 447 + }, + "Workflow": { + "nf-core/rnafusion": "v4.1.0" + } + }, + [ + "agat", + "agat/Homo_sapiens.GRCh38.46.gtf.bed", + "arriba", + "arriba/test.arriba.fusions.discarded.tsv", + "arriba/test.arriba.fusions.tsv", + "bioawk", + "bioawk/Homo_sapiens.GRCh38.46.gtf_rrna.gtf.gz", + "ctatsplicing", + "ctatsplicing/test.cancer.introns", + "ctatsplicing/test.cancer.introns.prelim", + "ctatsplicing/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/test.chckpts", + "ctatsplicing/test.ctat-splicing.igv.html", + "ctatsplicing/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/test.igv.tracks", + "ctatsplicing/test.introns", + "ctatsplicing/test.introns.for_IGV.bed", + "fastp", + "fastp/test.fastp.html", + "fastp/test.fastp.json", + "fastp/test.fastp.log", + "fastp/test_1.fastp.fastq.gz", + "fastp/test_2.fastp.fastq.gz", + "fastqc", + "fastqc/test.html", + "fastqc/test.zip", + "fastqc_for_fastp", + "fastqc_for_fastp/test_trimmed.html", + "fastqc_for_fastp/test_trimmed.zip", + "fusioncatcher", + "fusioncatcher/test.fusion-genes.txt", + "fusioncatcher/test.log", + "fusioncatcher/test.summary.txt", + "fusioninspector", + "fusioninspector/test", + "fusioninspector/test/FusionInspector.log", + "fusioninspector/test/IGV_inputs", + "fusioninspector/test/chckpts_dir", + "fusioninspector/test/fi_workdir", + "fusioninspector/test/fi_workdir/test.gtf", + "fusioninspector/test/test.FusionInspector.fusions.abridged.tsv", + "fusioninspector/test/test.FusionInspector.fusions.tsv", + "fusioninspector/test/test.fusion_inspector_web.html", + "fusionreport", + "fusionreport/test", + "fusionreport/test/AAA_BBB.html", + "fusionreport/test/test.fusionreport.tsv", + "fusionreport/test/test.fusionreport_filtered.tsv", + "fusionreport/test/test.fusions.csv", + "fusionreport/test/test.fusions.json", + "fusionreport/test/test_fusionreport_index.html", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "picard", + "picard/test.bai", + "picard/test.bam", + "picard/test.bam.metrics", + "picard/test.cram", + "picard/test.cram.crai", + "picard/test.rna_metrics", + "picard/test_collectinsertsize.pdf", + "picard/test_collectinsertsize.txt", + "pipeline_info", + "pipeline_info/nf_core_rnafusion_software_mqc_versions.yml", + "references", + "references/GRCh38", + "references/GRCh38/fusion_report_db", + "references/GRCh38/fusion_report_db/DB-timestamp.txt", + "references/GRCh38/fusion_report_db/cosmic.db", + "references/GRCh38/fusion_report_db/fusion_report.log", + "references/GRCh38/fusion_report_db/fusiongdb2.db", + "references/GRCh38/fusion_report_db/mitelman.db", + "references/GRCh38/gencode_v46", + "references/GRCh38/gencode_v46/gencode", + "references/GRCh38/gencode_v46/gencode/Homo_sapiens.GRCh38.46.gtf", + "references/GRCh38/gencode_v46/gencode/Homo_sapiens.GRCh38.46.gtf.fasta", + "references/GRCh38/gencode_v46/gencode/Homo_sapiens.GRCh38.46.gtf.genepred", + "references/GRCh38/gencode_v46/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", + "references/GRCh38/gencode_v46/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", + "references/GRCh38/gencode_v46/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", + "references/GRCh38/gencode_v46/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", + "references/GRCh38/gencode_v46/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", + "references/GRCh38/gencode_v46/salmon", + "references/GRCh38/gencode_v46/salmon/complete_ref_lens.bin", + "references/GRCh38/gencode_v46/salmon/ctable.bin", + "references/GRCh38/gencode_v46/salmon/ctg_offsets.bin", + "references/GRCh38/gencode_v46/salmon/duplicate_clusters.tsv", + "references/GRCh38/gencode_v46/salmon/info.json", + "references/GRCh38/gencode_v46/salmon/mphf.bin", + "references/GRCh38/gencode_v46/salmon/pos.bin", + "references/GRCh38/gencode_v46/salmon/pre_indexing.log", + "references/GRCh38/gencode_v46/salmon/rank.bin", + "references/GRCh38/gencode_v46/salmon/refAccumLengths.bin", + "references/GRCh38/gencode_v46/salmon/ref_indexing.log", + "references/GRCh38/gencode_v46/salmon/reflengths.bin", + "references/GRCh38/gencode_v46/salmon/refseq.bin", + "references/GRCh38/gencode_v46/salmon/seq.bin", + "references/GRCh38/gencode_v46/salmon/versionInfo.json", + "references/GRCh38/gencode_v46/star", + "references/GRCh38/gencode_v46/star/Genome", + "references/GRCh38/gencode_v46/star/Log.out", + "references/GRCh38/gencode_v46/star/SA", + "references/GRCh38/gencode_v46/star/SAindex", + "references/GRCh38/gencode_v46/star/chrLength.txt", + "references/GRCh38/gencode_v46/star/chrName.txt", + "references/GRCh38/gencode_v46/star/chrNameLength.txt", + "references/GRCh38/gencode_v46/star/chrStart.txt", + "references/GRCh38/gencode_v46/star/exonGeTrInfo.tab", + "references/GRCh38/gencode_v46/star/exonInfo.tab", + "references/GRCh38/gencode_v46/star/geneInfo.tab", + "references/GRCh38/gencode_v46/star/genomeParameters.txt", + "references/GRCh38/gencode_v46/star/sjdbInfo.txt", + "references/GRCh38/gencode_v46/star/sjdbList.fromGTF.out.tab", + "references/GRCh38/gencode_v46/star/sjdbList.out.tab", + "references/GRCh38/gencode_v46/star/transcriptInfo.tab", + "references/GRCh38/gencode_v46/starfusion", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/AnnotFilterRule.pm", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/blast_pairs.dat.gz", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/blast_pairs.idx", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/cancer_splicing_lib", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/cancer_splicing_lib/cancer_splicing.idx", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.gz", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.idx", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/pfam_domains.dbm", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/refGene.bed", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/refGene.sort.bed.gz", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/refGene.sort.bed.gz.tbi", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.cds", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.gtf", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.pep", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_annot.prot_info.dbm", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.fai", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.mm2", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ndb", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nhr", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nin", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.njs", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.not", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nsq", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ntf", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nto", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm", + "references/GRCh38/gencode_v46/starfusion/ctat_genome_lib_build_dir/trans.blast.dat.gz", + "references/GRCh38/hgnc", + "references/GRCh38/hgnc/HGNC-DB-timestamp.txt", + "references/GRCh38/hgnc/hgnc_complete_set.txt", + "salmon", + "salmon/test", + "salmon/test_lib_format_counts.json", + "salmon/test_meta_info.json", + "samtools", + "samtools/test_1.fq.gz", + "samtools/test_2.fq.gz", + "samtools/test_other.fq.gz", + "samtools/test_singleton.fq.gz", + "star", + "star/test.Aligned.sortedByCoord.out.bam", + "star/test.Aligned.sortedByCoord.out.bam.bai", + "starfusion", + "starfusion/test.starfusion.abridged.coding_effect.tsv", + "starfusion/test.starfusion.abridged.tsv", + "starfusion/test.starfusion.fusion_predictions.tsv", + "stringtie", + "stringtie/[:]", + "stringtie/[:]/stringtie.merged.gtf", + "stringtie/test.ballgown", + "stringtie/test.coverage.gtf", + "stringtie/test.gene.abundance.txt", + "stringtie/test.transcripts.gtf" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.2" }, - "timestamp": "2025-09-10T15:37:20.367608" + "timestamp": "2025-12-17T10:47:25.635305314" } } \ No newline at end of file diff --git a/tests/yml/cram_only.yml b/tests/yml/cram_only.yml new file mode 100644 index 000000000..4944f9e4c --- /dev/null +++ b/tests/yml/cram_only.yml @@ -0,0 +1,6 @@ +- sample: test + cram: https://github.com/nf-core/test-datasets/raw/81cb45949e75cbb85cbf6c5ec9009ab45b160823/testdata/human/test.Aligned.sortedByCoord.out.cram + crai: https://github.com/nf-core/test-datasets/raw/81cb45949e75cbb85cbf6c5ec9009ab45b160823/testdata/human/test.Aligned.sortedByCoord.out.cram.crai + junctions: https://github.com/nf-core/test-datasets/raw/81cb45949e75cbb85cbf6c5ec9009ab45b160823/testdata/human/test.Chimeric.out.junction + splice_junctions: https://github.com/nf-core/test-datasets/raw/81cb45949e75cbb85cbf6c5ec9009ab45b160823/testdata/human/test.SJ.out.tab + strandedness: forward diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index d8568d990..a2287d78b 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -19,6 +19,7 @@ include { STAR_ALIGN } from '../modules/nf-core/star/align/ include { SALMON_QUANT } from '../modules/nf-core/salmon/quant/main' include { SAMTOOLS_CONVERT } from '../modules/nf-core/samtools/convert/main' include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' +include { SAMTOOLS_COLLATEFASTQ } from '../modules/nf-core/samtools/collatefastq/main' include { paramsSummaryMap } from 'plugin/nf-schema' include { FASTQ_ALIGN_STAR } from '../subworkflows/nf-core/fastq_align_star' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -80,23 +81,37 @@ workflow RNAFUSION { return [ new_meta, fastqs, bam, bai, cram, crai, junctions, splice_junctions ] } .tap { ch_samplesheet } - .multiMap { meta, fastqs, bam, bai, cram, crai, junctions, splice_junctions -> - fastqs: [ meta, fastqs ] + .multiMap { meta, _fastqs, bam, bai, cram, crai, junctions, splice_junctions -> bam: [ meta, bam, bai ] cram: [ meta, cram, crai ] junctions: [ meta, junctions ] splice_junctions: [ meta, splice_junctions ] } - // Define which fastqs need to be processes (all analysis that's not aligning) + // Transform BAM/CRAM to fastq if needed def fastq_tools = ["salmon", "fusioninspector", "fusioncatcher"] selected_fastq_tools = tools.intersect(fastq_tools) - def ch_fastqs_to_process = ch_input.fastqs.branch { meta, fastqs -> - if (!fastqs && selected_fastq_tools) { - log.warn("Fastq files not found for sample '${meta.id}'. Skipping the following tools for this sample: ${selected_fastq_tools.join(', ')}") + def ch_fastq_branch = ch_samplesheet.branch { meta, fastqs, bam, _bai, cram, _crai, _junctions, _splice_junctions -> + if (!(fastqs || bam || cram) && selected_fastq_tools) { + log.warn("Fastq files not found or unable to create them for sample '${meta.id}'. Skipping the following tools for this sample: ${selected_fastq_tools.join(', ')}. Provide a FASTQ, BAM or CRAM file to run these tools.") } found: fastqs - not_found: !fastqs + return [ meta, fastqs ] + alignment: !fastqs && (bam || cram) + return [ meta, bam ?: cram ] + not_found: true + return [ meta, [] ] + } + + def ch_fastqs = ch_fastq_branch.found + if(selected_fastq_tools) { + SAMTOOLS_COLLATEFASTQ( + ch_fastq_branch.alignment, + BUILD_REFERENCES.out.fasta, + false + ) + ch_versions = ch_versions.mix(SAMTOOLS_COLLATEFASTQ.out.versions.first()) + ch_fastqs = ch_fastqs.mix(SAMTOOLS_COLLATEFASTQ.out.fastq) } // Convert CRAM to BAM when needed (when tools that don't support CRAM are used and when the sample isn't aligned) @@ -138,7 +153,7 @@ workflow RNAFUSION { def min_trimmed_reads = (params.min_trimmed_reads ?: 1) as Integer FASTQ_FASTQC_UMITOOLS_FASTP( - ch_fastqs_to_process.found, // reads: [ val(meta), [fastqs] ] + ch_fastqs, // reads: [ val(meta), [fastqs] ] params.skip_qc, // skip_fastqc with_umi, // with_umi skip_umi_extract, // skip_umi_extract @@ -301,7 +316,7 @@ workflow RNAFUSION { ) ch_versions = ch_versions.mix(FUSIONCATCHER_WORKFLOW.out.versions) // Add output of fusioncatcher to a channel + add empty entries for the samples that could not be run - ch_fusioncatcher_fusions = FUSIONCATCHER_WORKFLOW.out.fusions.mix(ch_fastqs_to_process.not_found) + ch_fusioncatcher_fusions = FUSIONCATCHER_WORKFLOW.out.fusions.mix(ch_fastq_branch.not_found) } //