|
2 | 2 | Run AMP screening tools |
3 | 3 | */ |
4 | 4 |
|
5 | | -include { MACREL_CONTIGS } from '../../modules/nf-core/macrel/contigs/main' |
6 | | -include { HMMER_HMMSEARCH as AMP_HMMER_HMMSEARCH } from '../../modules/nf-core/hmmer/hmmsearch/main' |
7 | | -include { AMPLIFY_PREDICT } from '../../modules/nf-core/amplify/predict/main' |
8 | | -include { AMPIR } from '../../modules/nf-core/ampir/main' |
9 | | -include { AMP_DATABASE_DOWNLOAD } from '../../modules/local/amp_database_download' |
10 | | -include { AMPCOMBI2_PARSETABLES } from '../../modules/nf-core/ampcombi2/parsetables' |
11 | | -include { AMPCOMBI2_COMPLETE } from '../../modules/nf-core/ampcombi2/complete' |
12 | | -include { AMPCOMBI2_CLUSTER } from '../../modules/nf-core/ampcombi2/cluster' |
13 | | -include { GUNZIP as GUNZIP_MACREL_PRED ; GUNZIP as GUNZIP_MACREL_ORFS } from '../../modules/nf-core/gunzip/main' |
14 | | -include { GUNZIP as AMP_GUNZIP_HMMER_HMMSEARCH } from '../../modules/nf-core/gunzip/main' |
15 | | -include { TABIX_BGZIP as AMP_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' |
16 | | -include { MERGE_TAXONOMY_AMPCOMBI } from '../../modules/local/merge_taxonomy_ampcombi' |
| 5 | +include { MACREL_CONTIGS } from '../../modules/nf-core/macrel/contigs' |
| 6 | +include { HMMER_HMMSEARCH as AMP_HMMER_HMMSEARCH } from '../../modules/nf-core/hmmer/hmmsearch' |
| 7 | +include { AMPLIFY_PREDICT } from '../../modules/nf-core/amplify/predict' |
| 8 | +include { AMPIR } from '../../modules/nf-core/ampir' |
| 9 | +include { AMP_DATABASE_DOWNLOAD } from '../../modules/local/amp_database_download' |
| 10 | +include { AMPCOMBI2_PARSETABLES } from '../../modules/nf-core/ampcombi2/parsetables' |
| 11 | +include { AMPCOMBI2_COMPLETE } from '../../modules/nf-core/ampcombi2/complete' |
| 12 | +include { AMPCOMBI2_CLUSTER } from '../../modules/nf-core/ampcombi2/cluster' |
| 13 | +include { GUNZIP as GUNZIP_MACREL_PRED ; GUNZIP as GUNZIP_MACREL_ORFS } from '../../modules/nf-core/gunzip' |
| 14 | +include { GUNZIP as AMP_GUNZIP_HMMER_HMMSEARCH } from '../../modules/nf-core/gunzip' |
| 15 | +include { TABIX_BGZIP as AMP_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip' |
| 16 | +include { MERGE_TAXONOMY_AMPCOMBI } from '../../modules/local/merge_taxonomy_ampcombi' |
17 | 17 |
|
18 | 18 | workflow AMP { |
19 | 19 | take: |
20 | | - fastas // tuple val(meta), path(contigs) |
21 | | - faas // tuple val(meta), path(PROKKA/PRODIGAL.out.faa) |
22 | | - tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) |
23 | | - gbks // tuple val(meta), path(ANNOTATION_ANNOTATION_TOOL.out.gbk) |
24 | | - tsvs_interpro // tuple val(meta), path(INTERPROSCAN.out.tsv)' |
| 20 | + fastas // tuple val(meta), path(contigs) |
| 21 | + faas // tuple val(meta), path(PROKKA/PRODIGAL.out.faa) |
| 22 | + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) |
| 23 | + gbks // tuple val(meta), path(ANNOTATION_ANNOTATION_TOOL.out.gbk) |
| 24 | + tsvs_interpro // tuple val(meta), path(INTERPROSCAN.out.tsv)' |
25 | 25 |
|
26 | 26 | main: |
27 | | - ch_versions = Channel.empty() |
28 | | - ch_ampresults_for_ampcombi = Channel.empty() |
29 | | - ch_macrel_faa = Channel.empty() |
30 | | - ch_ampcombi_summaries = Channel.empty() |
31 | | - ch_ampcombi_complete = null |
| 27 | + ch_versions = channel.empty() |
| 28 | + ch_ampresults_for_ampcombi = channel.empty() |
| 29 | + ch_macrel_faa = channel.empty() |
| 30 | + ch_ampcombi_summaries = channel.empty() |
| 31 | + ch_ampcombi_complete = null |
32 | 32 |
|
33 | 33 | // When adding new tool that requires FAA, make sure to update conditions |
34 | 34 | // in funcscan.nf around annotation and AMP subworkflow execution |
35 | 35 | // to ensure annotation is executed! |
36 | | - ch_faa_for_amplify = faas |
37 | | - ch_faa_for_amp_hmmsearch = faas |
38 | | - ch_faa_for_ampir = faas |
39 | | - ch_faa_for_ampcombi = faas |
40 | | - ch_gbk_for_ampcombi = gbks |
41 | | - ch_interpro_for_ampcombi = tsvs_interpro |
| 36 | + ch_faa_for_amplify = faas |
| 37 | + ch_faa_for_amp_hmmsearch = faas |
| 38 | + ch_faa_for_ampir = faas |
| 39 | + ch_faa_for_ampcombi = faas |
| 40 | + ch_gbk_for_ampcombi = gbks |
| 41 | + ch_interpro_for_ampcombi = tsvs_interpro |
42 | 42 |
|
43 | 43 | // AMPLIFY |
44 | | - if ( !params.amp_skip_amplify ) { |
45 | | - AMPLIFY_PREDICT ( ch_faa_for_amplify, [] ) |
46 | | - ch_versions = ch_versions.mix( AMPLIFY_PREDICT.out.versions ) |
47 | | - ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( AMPLIFY_PREDICT.out.tsv ) |
| 44 | + if (!params.amp_skip_amplify) { |
| 45 | + AMPLIFY_PREDICT(ch_faa_for_amplify, []) |
| 46 | + ch_versions = ch_versions.mix(AMPLIFY_PREDICT.out.versions) |
| 47 | + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix(AMPLIFY_PREDICT.out.tsv) |
48 | 48 | } |
49 | 49 |
|
50 | 50 | // MACREL |
51 | | - if ( !params.amp_skip_macrel ) { |
52 | | - MACREL_CONTIGS ( fastas ) |
53 | | - ch_versions = ch_versions.mix( MACREL_CONTIGS.out.versions ) |
54 | | - GUNZIP_MACREL_PRED ( MACREL_CONTIGS.out.amp_prediction ) |
55 | | - GUNZIP_MACREL_ORFS ( MACREL_CONTIGS.out.all_orfs ) |
56 | | - ch_versions = ch_versions.mix( GUNZIP_MACREL_PRED.out.versions ) |
57 | | - ch_versions = ch_versions.mix( GUNZIP_MACREL_ORFS.out.versions ) |
58 | | - ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( GUNZIP_MACREL_PRED.out.gunzip ) |
59 | | - ch_macrel_faa = ch_macrel_faa.mix( GUNZIP_MACREL_ORFS.out.gunzip ) |
60 | | - ch_faa_for_ampcombi = ch_faa_for_ampcombi.mix( ch_macrel_faa ) |
| 51 | + if (!params.amp_skip_macrel) { |
| 52 | + MACREL_CONTIGS(fastas) |
| 53 | + ch_versions = ch_versions.mix(MACREL_CONTIGS.out.versions) |
| 54 | + GUNZIP_MACREL_PRED(MACREL_CONTIGS.out.amp_prediction) |
| 55 | + GUNZIP_MACREL_ORFS(MACREL_CONTIGS.out.all_orfs) |
| 56 | + ch_versions = ch_versions.mix(GUNZIP_MACREL_PRED.out.versions) |
| 57 | + ch_versions = ch_versions.mix(GUNZIP_MACREL_ORFS.out.versions) |
| 58 | + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix(GUNZIP_MACREL_PRED.out.gunzip) |
| 59 | + ch_macrel_faa = ch_macrel_faa.mix(GUNZIP_MACREL_ORFS.out.gunzip) |
| 60 | + ch_faa_for_ampcombi = ch_faa_for_ampcombi.mix(ch_macrel_faa) |
61 | 61 | } |
62 | 62 |
|
63 | 63 | // AMPIR |
64 | | - if ( !params.amp_skip_ampir ) { |
65 | | - AMPIR ( ch_faa_for_ampir, params.amp_ampir_model, params.amp_ampir_minlength, 0.0 ) |
66 | | - ch_versions = ch_versions.mix( AMPIR.out.versions ) |
67 | | - ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( AMPIR.out.amps_tsv ) |
| 64 | + if (!params.amp_skip_ampir) { |
| 65 | + AMPIR(ch_faa_for_ampir, params.amp_ampir_model, params.amp_ampir_minlength, 0.0) |
| 66 | + ch_versions = ch_versions.mix(AMPIR.out.versions) |
| 67 | + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix(AMPIR.out.amps_tsv) |
68 | 68 | } |
69 | 69 |
|
70 | 70 | // HMMSEARCH |
71 | | - if ( params.amp_run_hmmsearch ) { |
72 | | - if ( params.amp_hmmsearch_models ) { ch_amp_hmm_models = Channel.fromPath( params.amp_hmmsearch_models, checkIfExists: true ) } else { error('[nf-core/funcscan] error: HMM model files not found for --amp_hmmsearch_models! Please check input.') } |
73 | | - |
74 | | - ch_amp_hmm_models_meta = ch_amp_hmm_models |
75 | | - .map { |
76 | | - file -> |
77 | | - def meta = [:] |
78 | | - meta['id'] = file.extension == 'gz' ? file.name - '.hmm.gz' : file.name - '.hmm' |
79 | | - [ meta, file ] |
80 | | - } |
| 71 | + if (params.amp_run_hmmsearch) { |
| 72 | + if (params.amp_hmmsearch_models) { |
| 73 | + ch_amp_hmm_models = channel.fromPath(params.amp_hmmsearch_models, checkIfExists: true) |
| 74 | + } |
| 75 | + else { |
| 76 | + error('[nf-core/funcscan] error: HMM model files not found for --amp_hmmsearch_models! Please check input.') |
| 77 | + } |
| 78 | + |
| 79 | + ch_amp_hmm_models_meta = ch_amp_hmm_models.map { file -> |
| 80 | + def meta = [:] |
| 81 | + meta['id'] = file.extension == 'gz' ? file.name - '.hmm.gz' : file.name - '.hmm' |
| 82 | + [meta, file] |
| 83 | + } |
81 | 84 |
|
82 | 85 | ch_in_for_amp_hmmsearch = ch_faa_for_amp_hmmsearch |
83 | | - .combine( ch_amp_hmm_models_meta ) |
84 | | - .map { |
85 | | - meta_faa, faa, meta_hmm, hmm -> |
86 | | - def meta_new = [:] |
87 | | - meta_new['id'] = meta_faa['id'] |
88 | | - meta_new['hmm_id'] = meta_hmm['id'] |
89 | | - [ meta_new, hmm, faa, params.amp_hmmsearch_savealignments, params.amp_hmmsearch_savetargets, params.amp_hmmsearch_savedomains ] |
90 | | - } |
91 | | - |
92 | | - AMP_HMMER_HMMSEARCH ( ch_in_for_amp_hmmsearch ) |
93 | | - ch_versions = ch_versions.mix( AMP_HMMER_HMMSEARCH.out.versions ) |
94 | | - AMP_GUNZIP_HMMER_HMMSEARCH ( AMP_HMMER_HMMSEARCH.out.output ) |
95 | | - ch_versions = ch_versions.mix( AMP_GUNZIP_HMMER_HMMSEARCH.out.versions ) |
96 | | - ch_AMP_GUNZIP_HMMER_HMMSEARCH = AMP_GUNZIP_HMMER_HMMSEARCH.out.gunzip |
97 | | - .map { meta, file -> |
98 | | - [ [id: meta.id], file ] |
| 86 | + .combine(ch_amp_hmm_models_meta) |
| 87 | + .map { meta_faa, faa, meta_hmm, hmm -> |
| 88 | + def meta_new = [:] |
| 89 | + meta_new['id'] = meta_faa['id'] |
| 90 | + meta_new['hmm_id'] = meta_hmm['id'] |
| 91 | + [meta_new, hmm, faa, params.amp_hmmsearch_savealignments, params.amp_hmmsearch_savetargets, params.amp_hmmsearch_savedomains] |
99 | 92 | } |
100 | | - ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( ch_AMP_GUNZIP_HMMER_HMMSEARCH ) |
| 93 | + |
| 94 | + AMP_HMMER_HMMSEARCH(ch_in_for_amp_hmmsearch) |
| 95 | + ch_versions = ch_versions.mix(AMP_HMMER_HMMSEARCH.out.versions) |
| 96 | + AMP_GUNZIP_HMMER_HMMSEARCH(AMP_HMMER_HMMSEARCH.out.output) |
| 97 | + ch_versions = ch_versions.mix(AMP_GUNZIP_HMMER_HMMSEARCH.out.versions) |
| 98 | + ch_AMP_GUNZIP_HMMER_HMMSEARCH = AMP_GUNZIP_HMMER_HMMSEARCH.out.gunzip.map { meta, file -> |
| 99 | + [[id: meta.id], file] |
| 100 | + } |
| 101 | + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix(ch_AMP_GUNZIP_HMMER_HMMSEARCH) |
101 | 102 | } |
102 | 103 |
|
103 | 104 | // AMPCOMBI2 |
104 | 105 | ch_input_for_ampcombi = ch_ampresults_for_ampcombi |
105 | 106 | .groupTuple() |
106 | | - .join( ch_faa_for_ampcombi ) |
107 | | - .join( ch_gbk_for_ampcombi ) |
108 | | - .join( ch_interpro_for_ampcombi ) |
109 | | - .multiMap{ |
110 | | - input: [ it[0], it[1] ] |
| 107 | + .join(ch_faa_for_ampcombi) |
| 108 | + .join(ch_gbk_for_ampcombi) |
| 109 | + .join(ch_interpro_for_ampcombi) |
| 110 | + .multiMap { |
| 111 | + input: [it[0], it[1]] |
111 | 112 | faa: it[2] |
112 | 113 | gbk: it[3] |
113 | | - interpro: it [4] |
| 114 | + interpro: it[4] |
114 | 115 | } |
115 | 116 |
|
116 | 117 | // AMPCOMBI2::PARSETABLES |
117 | | - if ( params.amp_ampcombi_db != null ) { |
118 | | - AMPCOMBI2_PARSETABLES ( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db_id, params.amp_ampcombi_db, ch_input_for_ampcombi.interpro ) |
119 | | - } else { |
120 | | - AMP_DATABASE_DOWNLOAD( params.amp_ampcombi_db_id ) |
121 | | - ch_versions = ch_versions.mix( AMP_DATABASE_DOWNLOAD.out.versions ) |
| 118 | + if (params.amp_ampcombi_db != null) { |
| 119 | + AMPCOMBI2_PARSETABLES(ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db_id, params.amp_ampcombi_db, ch_input_for_ampcombi.interpro) |
| 120 | + } |
| 121 | + else { |
| 122 | + AMP_DATABASE_DOWNLOAD(params.amp_ampcombi_db_id) |
| 123 | + ch_versions = ch_versions.mix(AMP_DATABASE_DOWNLOAD.out.versions) |
122 | 124 | ch_ampcombi_input_db = AMP_DATABASE_DOWNLOAD.out.db |
123 | | - AMPCOMBI2_PARSETABLES ( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db_id, ch_ampcombi_input_db, ch_input_for_ampcombi.interpro ) |
| 125 | + AMPCOMBI2_PARSETABLES(ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db_id, ch_ampcombi_input_db, ch_input_for_ampcombi.interpro) |
124 | 126 | } |
125 | | - ch_versions = ch_versions.mix( AMPCOMBI2_PARSETABLES.out.versions ) |
| 127 | + ch_versions = ch_versions.mix(AMPCOMBI2_PARSETABLES.out.versions) |
126 | 128 |
|
127 | | - ch_ampcombi_summaries = AMPCOMBI2_PARSETABLES.out.tsv.map{ it[1] }.collect() |
| 129 | + ch_ampcombi_summaries = AMPCOMBI2_PARSETABLES.out.tsv.map { it[1] }.collect() |
128 | 130 |
|
129 | 131 | // AMPCOMBI2::COMPLETE |
130 | 132 | ch_summary_count = ch_ampcombi_summaries.map { it.size() }.sum() |
131 | 133 |
|
132 | | - if ( ch_summary_count == 0 || ch_summary_count == 1 ) { |
| 134 | + if (ch_summary_count == 0 || ch_summary_count == 1) { |
133 | 135 | log.warn("[nf-core/funcscan] AMPCOMBI2: ${ch_summary_count} file(s) passed. Skipping AMPCOMBI2_COMPLETE, AMPCOMBI2_CLUSTER, and TAXONOMY MERGING steps.") |
134 | | - } else { |
| 136 | + } |
| 137 | + else { |
135 | 138 | AMPCOMBI2_COMPLETE(ch_ampcombi_summaries) |
136 | | - ch_versions = ch_versions.mix( AMPCOMBI2_COMPLETE.out.versions ) |
137 | | - ch_ampcombi_complete = AMPCOMBI2_COMPLETE.out.tsv |
138 | | - .filter { file -> file.countLines() > 1 } |
| 139 | + ch_versions = ch_versions.mix(AMPCOMBI2_COMPLETE.out.versions) |
| 140 | + ch_ampcombi_complete = AMPCOMBI2_COMPLETE.out.tsv.filter { file -> file.countLines() > 1 } |
139 | 141 | } |
140 | 142 |
|
141 | 143 | // AMPCOMBI2::CLUSTER |
142 | | - if ( ch_ampcombi_complete != null ) { |
143 | | - AMPCOMBI2_CLUSTER ( ch_ampcombi_complete ) |
144 | | - ch_versions = ch_versions.mix( AMPCOMBI2_CLUSTER.out.versions ) |
145 | | - } else { |
| 144 | + if (ch_ampcombi_complete != null) { |
| 145 | + AMPCOMBI2_CLUSTER(ch_ampcombi_complete) |
| 146 | + ch_versions = ch_versions.mix(AMPCOMBI2_CLUSTER.out.versions) |
| 147 | + } |
| 148 | + else { |
146 | 149 | log.warn("[nf-core/funcscan] No AMP hits were found in the samples and so no clustering will be applied.") |
147 | 150 | } |
148 | 151 |
|
149 | 152 | // MERGE_TAXONOMY |
150 | | - if ( params.run_taxa_classification && ch_ampcombi_complete == null ) { |
| 153 | + if (params.run_taxa_classification && ch_ampcombi_complete == null) { |
151 | 154 | log.warn("[nf-core/funcscan] No AMP hits were found in the samples, therefore no Taxonomy will be merged ") |
152 | | - } else if ( params.run_taxa_classification && ch_ampcombi_complete != null ) { |
153 | | - ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() |
| 155 | + } |
| 156 | + else if (params.run_taxa_classification && ch_ampcombi_complete != null) { |
| 157 | + ch_mmseqs_taxonomy_list = tsvs.map { it[1] }.collect() |
154 | 158 |
|
155 | | - MERGE_TAXONOMY_AMPCOMBI( AMPCOMBI2_CLUSTER.out.cluster_tsv, ch_mmseqs_taxonomy_list ) |
156 | | - ch_versions = ch_versions.mix( MERGE_TAXONOMY_AMPCOMBI.out.versions ) |
| 159 | + MERGE_TAXONOMY_AMPCOMBI(AMPCOMBI2_CLUSTER.out.cluster_tsv, ch_mmseqs_taxonomy_list) |
| 160 | + ch_versions = ch_versions.mix(MERGE_TAXONOMY_AMPCOMBI.out.versions) |
157 | 161 |
|
158 | | - ch_tabix_input = Channel.of( [ 'id':'ampcombi_complete_summary_taxonomy' ] ) |
159 | | - .combine( MERGE_TAXONOMY_AMPCOMBI.out.tsv ) |
| 162 | + ch_tabix_input = channel.of(['id': 'ampcombi_complete_summary_taxonomy']) |
| 163 | + .combine(MERGE_TAXONOMY_AMPCOMBI.out.tsv) |
160 | 164 |
|
161 | | - AMP_TABIX_BGZIP( ch_tabix_input ) |
162 | | - ch_versions = ch_versions.mix( AMP_TABIX_BGZIP.out.versions ) |
| 165 | + AMP_TABIX_BGZIP(ch_tabix_input) |
| 166 | + ch_versions = ch_versions.mix(AMP_TABIX_BGZIP.out.versions) |
163 | 167 | } |
164 | 168 |
|
165 | 169 | emit: |
|
0 commit comments