diff --git a/README.md b/README.md index a812641..4cd3243 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,6 @@ You can choose to run one or several aligner in parallel. | star 2pass mode | ✅ | ✅ | ⚠️ | ⚠️ | | subread | ✅ | ✅ | ⚠️ | ⚠️ | | sublong | ⚠️ | 🚫 | ✅ | ✅ | -| tophat | ✅ | ✅ | 🚫 | 🚫 | *Legend* ✅ Recommended @@ -90,7 +89,6 @@ It is then translated to the correct option in the following aligners: | star 2pass mode | 🚫 | 🚫 | 🚫 | | subread | -S fr / -S rf / -S ff | ISF ISR IU / OSF OSR OU / MSF MSR MU | read orientation | | sublong | 🚫 | 🚫 | 🚫 | -| tophat2 | fr-unstranded / fr-firststrand / fr-secondstrand | U / SR / SF | strand information | *Legend* U unstranded; SR stranded reverse; SF stranded forward; IU inward unstranded; OU outward unstranded; MU matching unstranded; ISF inward stranded forward; ISR inward stranded reverse; OSF outward stranded forward; OSR outward stranded reverse; MSF matching stranded forward; MSR matching stranded reverse ([see herefor morde details](https://salmon.readthedocs.io/en/latest/library_type.html)) @@ -124,7 +122,6 @@ If you provide an annotation file the pipeline will pass automatically the file | star 2pass mode | GTF / GFF (--sjdbGTFfile + --sjdbGTFtagExonParentTranscript Parent in case of GFF ) | | subread | GTF or compatible GFF format (-a) | | sublong | 🚫 | -| tophat | GTF/GFF3 (-G) | *Legend* 🚫 Not applicable @@ -255,7 +252,7 @@ nextflow run Juke34/AliNe \ --reads https://github.com/Juke34/AliNe/raw/refs/heads/main/test/illumina/yeast_R1.fastq.gz \ --genome https://raw.githubusercontent.com/Juke34/AliNe/refs/heads/main/test/yeast.fa \ --read_type short_single \ - --aligner bbmap,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,minimap2,ngmlr,nucmer,star,subread,sublong,tophat2 \ + --aligner bbmap,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,minimap2,ngmlr,nucmer,star,subread,sublong \ --trimming_fastp \ --star_options "--genomeSAindexNbases 9" ``` @@ -310,7 +307,7 @@ On success you should get a message looking like this: --reads path to the reads file or folder --reads_extension extension of the reads files (default: .fastq.gz) --genome path to the genome file - --aligner aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwasw, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, star, subread, sublong, tophat2] + --aligner aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwasw, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, star, subread, sublong] --outdir path to the output directory (default: alignment_results) --annotation [Optional][used by STAR, Tophat2] Absolute path to the annotation file (gtf or gff3) @@ -346,7 +343,6 @@ On success you should get a message looking like this: --read_length [Optional][used by STAR] length of the reads, if none provided it is automatically deduced --subread_options additional options for subread --sublong_options additional options for sublong - --tophat2_options additional options for tophat ``` ## Contributing diff --git a/aline.nf b/aline.nf index f64f507..19909ca 100644 --- a/aline.nf +++ b/aline.nf @@ -29,7 +29,7 @@ libtype_allowed = [ 'U', 'IU', 'MU', 'OU', 'ISF', 'ISR', 'MSF', 'MSR', 'OSF', 'O params.library_type = "auto" params.skip_libray_usage = false // Avoid to use library type provided by library_type or auto params.read_length = "" // Use by star to set the sjdbOverhang parameter -// annotation is used by different aligner (tophat2, star, etc.). To avoid to duplicate processes according to the presence of the annotation file, a specific process is dedicated to create a fake file is none provided. +// annotation is used by different aligner (star, etc.). To avoid to duplicate processes according to the presence of the annotation file, a specific process is dedicated to create a fake file is none provided. // If process receive a file wich is not the fake one it includes the file in the command. To append the options of aligner we will use the annotation_file variable // While the processes will be called sending the "annotation" channel created by the prepare_annotation process. params.annotation = "" @@ -38,7 +38,7 @@ params.annotation = "" params.trimming_fastp = false // Aligner params -align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwasw', 'graphmap2', 'hisat2', 'kallisto', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'star', 'subread', 'sublong', 'tophat2' ] +align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwasw', 'graphmap2', 'hisat2', 'kallisto', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'star', 'subread', 'sublong' ] params.aligner = '' params.bbmap_options = '' params.bowtie_options = '' @@ -61,7 +61,6 @@ params.star_index_options = '' params.star_2pass = false params.subread_options = '-t 0'// -t specifes the type of input sequencing data. Possible values include 0, denoting RNA-seq data, or 1, denoting genomic DNA-seq data. params.sublong_options = '-X'// -X turn on the RNA-seq mode. -params.tophat2_options = '' // Report params params.fastqc = false @@ -313,20 +312,6 @@ if ( "sublong" in aligner_list ){ } } -// --- tophat2 tool --- -if ( "tophat2" in aligner_list ){ - log.warn ": Tophat2 has been deprecated. The developers recommend to switch to HISAT2. It is implemented here uniquely for comparison and reproducibily of ancient analyses.\n" - if (annotation_file && !params.tophat2_options.contains("-G ") ){ - params.replace("tophat2_options", "${params.tophat2_options} -G ${annotation_file}") - } - if (!params.relax){ - if ( params.read_type == "ont" || params.read_type == "pacbio"){ - log.error "Tophat2 aligner does not handle properly ont or pacbio data, please remove it from the list of aligner to use.\nOtherwise, if you know what you are doing you can activate the AliNe --relax parameter to use options that do not reflect expectation.\n" - stop_pipeline = true - } - } -} - if(stop_pipeline){ exit 1, "Please fix previous issues in order to run the pipeline.\n" } @@ -369,7 +354,7 @@ include {fastqc as fastqc_raw; fastqc as fastqc_fastp; fastqc as fastqc_ali_bbma fastqc as fastqc_ali_bwaaln; fastqc as fastqc_ali_bwamem; fastqc as fastqc_ali_bwasw; fastqc as fastqc_ali_graphmap2 ; fastqc as fastqc_ali_hisat2; fastqc as fastqc_ali_kallisto; fastqc as fastqc_ali_minimap2; fastqc as fastqc_ali_ngmlr; fastqc as fastqc_ali_novoalign ; fastqc as fastqc_ali_nucmer; fastqc as fastqc_ali_star; fastqc as fastqc_ali_subread ; - fastqc as fastqc_ali_sublong ; fastqc as fastqc_ali_tophat2} from "$baseDir/modules/fastqc.nf" + fastqc as fastqc_ali_sublong } from "$baseDir/modules/fastqc.nf" include {hisat2_index; hisat2} from "$baseDir/modules/hisat2.nf" include {kallisto_index; kallisto} from "$baseDir/modules/kallisto.nf" include {minimap2_index; minimap2} from "$baseDir/modules/minimap2.nf" @@ -385,12 +370,11 @@ include {samtools_sam2bam_nucmer; samtools_sam2bam as samtools_sam2bam_bowtie; s include {samtools_sort as samtools_sort_bbmap; samtools_sort as samtools_sort_bowtie; samtools_sort as samtools_sort_bowtie2; samtools_sort as samtools_sort_bwaaln; samtools_sort as samtools_sort_bwamem; samtools_sort as samtools_sort_bwasw; samtools_sort as samtools_sort_graphmap2; samtools_sort as samtools_sort_hisat2; samtools_sort as samtools_sort_minimap2; samtools_sort as samtools_sort_ngmlr; - samtools_sort as samtools_sort_novoalign; samtools_sort as samtools_sort_nucmer; samtools_sort as samtools_sort_tophat2; + samtools_sort as samtools_sort_novoalign; samtools_sort as samtools_sort_nucmer; samtools_sort as samtools_sort_sublong } from "$baseDir/modules/samtools.nf" include {seqtk_sample} from "$baseDir/modules/seqtk.nf" include {subread_index; subread; sublong_index; sublong} from "$baseDir/modules/subread.nf" include {prepare_star_index_options; star_index; star; star2pass} from "$baseDir/modules/star.nf" -include {tophat2_index; tophat2} from "$baseDir/modules/tophat.nf" //************************************************* // STEP 3 - CHECK 2 for parameters @@ -844,18 +828,6 @@ workflow align { } } - // --- TOPHAT2 --- - if ("tophat2" in aligner_list ){ - tophat2_index(genome.collect(), "alignment/tophat2/indicies") // index - tophat2(reads, genome.collect(), tophat2_index.out.collect(), annotation.collect(), "alignment/tophat2") // align - logs.concat(tophat2.out.tophat2_summary).set{logs} // save log - samtools_sort_tophat2(tophat2.out.tuple_sample_bam, "alignment/tophat2") - if(params.fastqc){ - fastqc_ali_tophat2(star_result, "fastqc/tophat2", "tophat2") - logs.concat(fastqc_ali_tophat2.out).set{logs} // save log - } - } - // ------------------- MULTIQC ----------------- multiqc(logs.collect(),params.multiqc_config) } @@ -949,7 +921,6 @@ def helpMSG() { --read_length [Optional][used by STAR] length of the reads, if none provided it is automatically deduced --subread_options additional options for subread --sublong_options additional options for sublong - --tophat2_options additional options for tophat """ } @@ -1060,11 +1031,6 @@ def printAlignerOptions(aligner_list, annotation_file, star_index_options) { subread parameters subread_options : ${params.subread_options} """} - if ("tophat2" in aligner_list){ - sentence += """ - tophat parameters - tophat2_options : ${params.tophat2_options} - """} return sentence } diff --git a/config/multiqc_conf.yml b/config/multiqc_conf.yml index b949dcb..5371906 100644 --- a/config/multiqc_conf.yml +++ b/config/multiqc_conf.yml @@ -6,7 +6,6 @@ run_modules: - bowtie2 - hisat2 - star - - tophat - kallisto module_order: @@ -76,9 +75,4 @@ module_order: - fastqc: name: "FastQC (star)" path_filters: - - "*star_logs*" - - tophat - - fastqc: - name: "FastQC (tophat)" - path_filters: - - "*tophat2_logs*" + - "*star_logs*" \ No newline at end of file diff --git a/config/ressources/local.config b/config/ressources/local.config index 2102cd1..069724c 100644 --- a/config/ressources/local.config +++ b/config/ressources/local.config @@ -33,8 +33,4 @@ process { cpus = 2 time = '1h' } - withLabel: 'tophat2' { - cpus = 4 - time = '4h' - } } diff --git a/config/softwares.config b/config/softwares.config index d1d155a..d6921e7 100644 --- a/config/softwares.config +++ b/config/softwares.config @@ -62,7 +62,4 @@ process { withLabel: 'subread' { container = 'quay.io/biocontainers/subread:2.0.6--he4a0461_2' } - withLabel: 'tophat2' { - container = 'quay.io/biocontainers/tophat:2.1.1--py27_3' - } } \ No newline at end of file diff --git a/modules/tophat.nf b/modules/tophat.nf deleted file mode 100644 index 2dff338..0000000 --- a/modules/tophat.nf +++ /dev/null @@ -1,89 +0,0 @@ -/* ------------ TOPHAT2 ----------- - -*/ - -// It use bowtie in the background -process tophat2_index { - label 'tophat2' - tag "$genome_fasta" - publishDir "${params.outdir}/${outpath}", mode: 'copy' - - input: - path(genome_fasta) - val outpath - - output: - path('*.bt2') - - script: - - """ - bowtie2-build ${genome_fasta} ${genome_fasta.baseName} - """ -} - -process tophat2 { - label 'tophat2' - tag "$sample" - errorStrategy 'ignore' - publishDir "${params.outdir}/${outpath}/${filename}", path: "*", mode: 'copy', - saveAs: { file -> - if (!file.endsWith('.bam')) { - return file // Publish the file - } else { - return null // Exclude the file from being published - } - } - - - input: - tuple val(sample), path(reads), val(library), val(read_length) - path genome - path index_files - path annotation // optional -if provided the option is set in tophat2_options in main.nf - val outpath - - output: - tuple val(sample), path ("*accepted_hits.bam"), emit: tuple_sample_bam, optional:true - path "*align_summary.txt", emit: tophat2_summary - path ("*") - - script: - - // set input according to read_type parameter - def input = "${reads[0]}" - if (params.read_type == "short_paired"){ - input = "${reads[0]} ${reads[1]}" // if short reads check paired or not - } - - // catch filename - filename = reads[0].baseName.replace('.fastq','') - - // deal with library type - default is unstranded. - def lib_strand="" - if (! params.tophat2_options.contains("--library-type ") && - ! library.contains("U") && - ! params.skip_libray_usage){ // only if -S is not set and if we are not skipping library usage - if ( library.contains("SR") ) { - lib_strand = "--library-type=fr-firststrand" - } else if ( library.contains("SF") ) { - lib_strand = "--library-type=fr-secondstrand" - } - } - - - """ - tophat -p ${task.cpus} \\ - -o ${filename} \\ - ${lib_strand} ${params.tophat2_options} ${genome.baseName} \\ - ${input} - - # mv the output files - for i in ${filename}/*; do - file=\$(basename \${i}); - mv \${i} ${filename}_tophat2_\${file}; - done - """ - - -} \ No newline at end of file diff --git a/paper/paper.bib b/paper/paper.bib index 0fe5977..584601e 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -234,23 +234,6 @@ @article{subread url = {https://pubmed.ncbi.nlm.nih.gov/23558742/}, year = {2013}, } -@article{tophat2, - abstract = {TopHat is a popular spliced aligner for RNA-sequence (RNA-seq) experiments. In this paper, we describe TopHat2, which incorporates many significant enhancements to TopHat. TopHat2 can align reads of various lengths produced by the latest sequencing technologies, while allowing for variable-length indels with respect to the reference genome. In addition to de novo spliced alignment, TopHat2 can align reads across fusion breaks, which can occur after genomic translocations. TopHat2 combines the ability to identify novel splice sites with direct mapping to known transcripts, producing sensitive and accurate alignments, even for highly repetitive genomes or in the presence of pseudogenes. TopHat2 is available at http://ccb.jhu.edu/software/tophat. © 2013 Kim et al.; licensee BioMed Central Ltd.}, - author = {Daehwan Kim and Geo Pertea and Cole Trapnell and Harold Pimentel and Ryan Kelley and Steven L. Salzberg}, - doi = {10.1186/GB-2013-14-4-R36}, - issn = {1474760X}, - issue = {4}, - journal = {Genome Biology}, - keywords = {Animal Genetics and Genomics,Bioinformatics,Evolutionary Biology,Human Genetics,Microbial Genetics and Genomics,Plant Genetics and Genomics}, - month = {4}, - pages = {1-13}, - pmid = {23618408}, - publisher = {BioMed Central}, - title = {TopHat2: Accurate alignment of transcriptomes in the presence of insertions, deletions and gene fusions}, - volume = {14}, - url = {https://genomebiology.biomedcentral.com/articles/10.1186/gb-2013-14-4-r36}, - year = {2013}, -} @misc{bwamem, title={Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM}, author={Heng Li}, diff --git a/paper/paper.md b/paper/paper.md index f302e1c..0538302 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -20,7 +20,7 @@ bibliography: paper.bib Alignment of sequencing reads against a reference genome is a fundamental step in many bioinformatics workflows. Aligner performance varies by speed, memory efficiency, and accuracy, with some tailored to specific sequencing technologies and others more versatile, which makes the selection of an appropriate tool context-dependent. To streamline this process, we present AliNe (Alignment in Nextflow), a flexible and efficient read alignment pipeline built on the Nextflow framework [@nextflow]. AliNe contains a broad range of commonly used aligners, and is designed to accommodate any high-throughput sequencing projects. -AliNe supports short reads (both paired-end and single-end) as well as long reads generated by PacBio and Oxford Nanopore Technologies (ONT). It currently supports 17 widely used alignment tools, including BBMap [@bbmap], Bowtie [@bowtie], Bowtie2 [@bowtie2], BWA [@bwaaln], BWA-MEM [@bwamem], BWA-SW [@bwasw], GraphMap2 [@graphmap2], HISAT2 [@hisat2], Kallisto [@kallisto], Minimap2 [@minimap2], ngmlr [@ngmlr], novoAlign [@novoalign], nucmer [@nucmer], STAR (single or two-pass mode) [@star], subread [@subread], sublong [@subread] and Tophat2 [@tophat2]. These aligners are integrated into a single, easy-to-use workflow, providing a unified entry point for any project requiring alignment and giving users the flexibility to choose the best tool for their specific data and objectives. AliNe is designed to minimize user inputs and avoid common parameter mistakes ( e.g. scoring system, strandedness, orientation). +AliNe supports short reads (both paired-end and single-end) as well as long reads generated by PacBio and Oxford Nanopore Technologies (ONT). It currently supports 16 widely used alignment tools, including BBMap [@bbmap], Bowtie [@bowtie], Bowtie2 [@bowtie2], BWA [@bwaaln], BWA-MEM [@bwamem], BWA-SW [@bwasw], GraphMap2 [@graphmap2], HISAT2 [@hisat2], Kallisto [@kallisto], Minimap2 [@minimap2], ngmlr [@ngmlr], novoAlign [@novoalign], nucmer [@nucmer], STAR (single or two-pass mode) [@star], subread [@subread] and sublong [@subread]. These aligners are integrated into a single, easy-to-use workflow, providing a unified entry point for any project requiring alignment and giving users the flexibility to choose the best tool for their specific data and objectives. AliNe is designed to minimize user inputs and avoid common parameter mistakes ( e.g. scoring system, strandedness, orientation). # Statement of Need diff --git a/profiles/test_illumina_paired.config b/profiles/test_illumina_paired.config index 1f3fb41..a447859 100644 --- a/profiles/test_illumina_paired.config +++ b/profiles/test_illumina_paired.config @@ -7,7 +7,7 @@ params { reads = "$baseDir/test/illumina/" genome = "$baseDir/test/yeast.fa" - aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,minimap2,nucmer,star,subread,tophat2' + aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,minimap2,nucmer,star,subread' star_options = "--genomeSAindexNbases 9" // the default 14 is too large for the genome size=1351857 multiqc_config = "$baseDir/config/multiqc_conf.yml" } \ No newline at end of file diff --git a/profiles/test_illumina_single.config b/profiles/test_illumina_single.config index 532aa7e..9cb4ca1 100644 --- a/profiles/test_illumina_single.config +++ b/profiles/test_illumina_single.config @@ -8,7 +8,7 @@ params { reads = "$baseDir/test/illumina/" genome = "$baseDir/test/yeast.fa" params.read_type = "short_single" - aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong,tophat2' + aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong' trimming_fastp = true star_options = "--genomeSAindexNbases 9" // the default 14 is too large for the genome size=1351857 multiqc_config = "$baseDir/config/multiqc_conf.yml"