forked from Plant-Food-Research-Open/genepal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.nf
executable file
·178 lines (157 loc) · 5.46 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/env nextflow
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
plant-food-research-open/genepal
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Github : https://github.com/plant-food-research-open/genepal
----------------------------------------------------------------------------------------
*/
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
include { GENEPAL } from './workflows/genepal'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_genepal_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_genepal_pipeline'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PROCESS: Filter Genome Assembly by Minimum Contig Length
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
process SEQKIT_GET_LENGTH {
tag "${meta.id}"
label 'process_medium'
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/seqkit:2.4.0--h9ee0642_0'
: 'quay.io/biocontainers/seqkit:2.4.0--h9ee0642_0'}"
input:
tuple val(meta), path(genome_fasta)
output:
tuple val(meta), path("filtered_${meta.id}.fasta"), path("${meta.id}_contig_list.txt"), emit: filtered_fasta
script:
"""
# Filter contigs based on length and output filtered FASTA
seqkit seq --min-len ${params.min_contig_length} ${genome_fasta} > filtered_${meta.id}.fasta
# Generate a list of filtered contigs
seqkit fx2tab --length --name filtered_${meta.id}.fasta | awk '{print \$1}' > ${meta.id}_contig_list.txt
"""
}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NAMED WORKFLOWS FOR PIPELINE
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
//
// WORKFLOW: Run main analysis pipeline depending on type of input
//
workflow PLANTFOODRESEARCHOPEN_GENEPAL {
take:
ch_target_assembly
ch_tar_assm_str
ch_is_masked
ch_te_library
ch_braker_annotation
ch_braker_ex_asm_str
ch_benchmark_gff
ch_rna_sra
ch_rna_fq
ch_rna_bam_by_assembly
ch_sortmerna_fastas
ch_ext_prot_fastas
ch_liftoff_fasta
ch_liftoff_gff
ch_tsebra_config
ch_orthofinder_pep
main:
//
// Filter genome assembly by minimum contig length
//
SEQKIT_GET_LENGTH(ch_target_assembly)
//
// Run GENEPAL main workflow using filtered FASTA
//
GENEPAL(
SEQKIT_GET_LENGTH.out.filtered_fasta.map { meta, fasta, contig_list -> [ meta, fasta ] }, // Filtered genome FASTA
ch_tar_assm_str,
ch_is_masked,
ch_te_library,
ch_braker_annotation,
ch_braker_ex_asm_str,
ch_benchmark_gff,
ch_rna_sra,
ch_rna_fq,
ch_rna_bam_by_assembly,
ch_sortmerna_fastas,
ch_ext_prot_fastas,
ch_liftoff_fasta,
ch_liftoff_gff,
ch_tsebra_config,
ch_orthofinder_pep
)
emit:
multiqc_report = GENEPAL.out.multiqc_report // channel: /path/to/multiqc_report.html
}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN MAIN WORKFLOW
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
workflow {
main:
//
// SUBWORKFLOW: Run initialization tasks
//
PIPELINE_INITIALISATION(
params.version,
params.monochrome_logs,
args,
params.outdir,
params.input,
params.rna_evidence,
params.liftoff_annotations,
params.orthofinder_annotations
)
//
// Filter genome assembly by minimum contig length
//
SEQKIT_GET_LENGTH(PIPELINE_INITIALISATION.out.target_assembly)
//
// Run main workflow using filtered FASTA
//
PLANTFOODRESEARCHOPEN_GENEPAL(
SEQKIT_GET_LENGTH.out.filtered_fasta,
PIPELINE_INITIALISATION.out.tar_assm_str,
PIPELINE_INITIALISATION.out.is_masked,
PIPELINE_INITIALISATION.out.te_library,
PIPELINE_INITIALISATION.out.braker_annotation,
PIPELINE_INITIALISATION.out.braker_ex_asm_str,
PIPELINE_INITIALISATION.out.benchmark_gff,
PIPELINE_INITIALISATION.out.rna_sra,
PIPELINE_INITIALISATION.out.rna_fq,
PIPELINE_INITIALISATION.out.rna_bam_by_assembly,
PIPELINE_INITIALISATION.out.sortmerna_fastas,
PIPELINE_INITIALISATION.out.ext_prot_fastas,
PIPELINE_INITIALISATION.out.liftoff_fasta,
PIPELINE_INITIALISATION.out.liftoff_gff,
PIPELINE_INITIALISATION.out.tsebra_config,
PIPELINE_INITIALISATION.out.orthofinder_pep
)
//
// SUBWORKFLOW: Run completion tasks
//
PIPELINE_COMPLETION(
params.email,
params.email_on_fail,
params.plaintext_email,
params.outdir,
params.monochrome_logs,
params.hook_url,
PLANTFOODRESEARCHOPEN_GENEPAL.out.multiqc_report
)
}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/