Merge pull request #15 from Juke34/remove_tophat2

get rid of tophat2 fix #5
Juke34 · Jan 17, 2025 · 80c709c · 80c709c
2 parents 5cab552 + 5571f8d
commit 80c709c
Show file tree

Hide file tree

Showing 10 changed files with 10 additions and 167 deletions.
diff --git a/README.md b/README.md
@@ -55,7 +55,6 @@ You can choose to run one or several aligner in parallel.
 | star 2pass mode | ✅ | ✅ | ⚠️ | ⚠️ |
 | subread | ✅ | ✅ | ⚠️ | ⚠️ |
 | sublong | ⚠️ | 🚫 | ✅ | ✅ |
-| tophat | ✅ | ✅ | 🚫 | 🚫 |
 
 *Legend*  
 ✅ Recommended  
@@ -90,7 +89,6 @@ It is then translated to the correct option in the following aligners:
 | star 2pass mode | 🚫 | 🚫 | 🚫 |
 | subread | -S fr / -S rf / -S ff | ISF ISR IU / OSF OSR OU / MSF MSR MU | read orientation |
 | sublong | 🚫 | 🚫 | 🚫 |
-| tophat2 | fr-unstranded / fr-firststrand / fr-secondstrand | U / SR / SF | strand information |
 
 *Legend*  
 U unstranded; SR stranded reverse; SF stranded forward; IU inward unstranded; OU outward unstranded; MU matching unstranded; ISF inward stranded forward; ISR inward stranded reverse; OSF outward stranded forward; OSR outward stranded reverse; MSF matching stranded forward; MSR matching stranded reverse ([see herefor morde details](https://salmon.readthedocs.io/en/latest/library_type.html))  
@@ -124,7 +122,6 @@ If you provide an annotation file the pipeline will pass automatically the file
 | star 2pass mode | GTF / GFF (--sjdbGTFfile + --sjdbGTFtagExonParentTranscript Parent in case of GFF ) |
 | subread | GTF or compatible GFF format (-a) |
 | sublong | 🚫 |
-| tophat | GTF/GFF3 (-G) | 
 
  *Legend*  
 🚫 Not applicable  
@@ -255,7 +252,7 @@ nextflow run Juke34/AliNe \
   --reads https://github.com/Juke34/AliNe/raw/refs/heads/main/test/illumina/yeast_R1.fastq.gz \
   --genome https://raw.githubusercontent.com/Juke34/AliNe/refs/heads/main/test/yeast.fa \
   --read_type short_single \
-  --aligner bbmap,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,minimap2,ngmlr,nucmer,star,subread,sublong,tophat2 \
+  --aligner bbmap,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,minimap2,ngmlr,nucmer,star,subread,sublong \
   --trimming_fastp \
   --star_options "--genomeSAindexNbases 9"
 ```
@@ -310,7 +307,7 @@ On success you should get a message looking like this:
         --reads                     path to the reads file or folder
         --reads_extension           extension of the reads files (default: .fastq.gz)
         --genome                    path to the genome file
-        --aligner                   aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwasw, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, star, subread, sublong, tophat2]
+        --aligner                   aligner(s) to use among this list (comma or space separated) [bbmap, bowtie, bowtie2, bwaaln, bwamem, bwasw, graphmap2, hisat2, kallisto, minimap2, novoalign, nucmer, ngmlr, star, subread, sublong]
         --outdir                    path to the output directory (default: alignment_results)
         --annotation                [Optional][used by STAR, Tophat2] Absolute path to the annotation file (gtf or gff3)
 
@@ -346,7 +343,6 @@ On success you should get a message looking like this:
         --read_length               [Optional][used by STAR] length of the reads, if none provided it is automatically deduced
         --subread_options           additional options for subread
         --sublong_options           additional options for sublong
-        --tophat2_options            additional options for tophat
 ```
 
 ## Contributing

diff --git a/aline.nf b/aline.nf
@@ -29,7 +29,7 @@ libtype_allowed = [ 'U', 'IU', 'MU', 'OU', 'ISF', 'ISR', 'MSF', 'MSR', 'OSF', 'O
 params.library_type = "auto" 
 params.skip_libray_usage = false // Avoid to use library type provided by library_type or auto
 params.read_length = "" // Use by star to set the sjdbOverhang parameter
-// annotation is used by different aligner (tophat2, star, etc.). To avoid to duplicate processes according to the presence of the annotation file, a specific process is dedicated to create a fake file is none provided. 
+// annotation is used by different aligner (star, etc.). To avoid to duplicate processes according to the presence of the annotation file, a specific process is dedicated to create a fake file is none provided. 
 // If process receive a file wich is not the fake one it includes the file in the command. To append the options of aligner we will use the annotation_file variable
 // While the processes will be called sending the "annotation" channel created by the prepare_annotation process.
 params.annotation = ""
@@ -38,7 +38,7 @@ params.annotation = ""
 params.trimming_fastp = false
 
 // Aligner params
-align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwasw', 'graphmap2', 'hisat2', 'kallisto', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'star', 'subread', 'sublong', 'tophat2' ]
+align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwasw', 'graphmap2', 'hisat2', 'kallisto', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'star', 'subread', 'sublong' ]
 params.aligner = ''
 params.bbmap_options = ''
 params.bowtie_options = ''
@@ -61,7 +61,6 @@ params.star_index_options = ''
 params.star_2pass = false
 params.subread_options = '-t 0'// -t specifes the type of input sequencing data. Possible values include 0, denoting RNA-seq data, or 1, denoting genomic DNA-seq data.
 params.sublong_options = '-X'// -X turn on the RNA-seq mode.
-params.tophat2_options = ''
 
 // Report params
 params.fastqc = false
@@ -313,20 +312,6 @@ if ( "sublong" in aligner_list ){
     }
 }
 
-// --- tophat2 tool ---
-if ( "tophat2" in aligner_list ){
-    log.warn ": Tophat2 has been deprecated. The developers recommend to switch to HISAT2. It is implemented here uniquely for comparison and reproducibily of ancient analyses.\n"
-    if (annotation_file && !params.tophat2_options.contains("-G ") ){
-         params.replace("tophat2_options", "${params.tophat2_options} -G ${annotation_file}")
-    }
-    if (!params.relax){
-        if ( params.read_type == "ont" ||  params.read_type == "pacbio"){
-            log.error "Tophat2 aligner does not handle properly ont or pacbio data, please remove it from the list of aligner to use.\nOtherwise, if you know what you are doing you can activate the AliNe --relax parameter to use options that do not reflect expectation.\n"
-            stop_pipeline = true
-        }
-    }
-}
-
 if(stop_pipeline){
     exit 1, "Please fix previous issues in order to run the pipeline.\n"
 }
@@ -369,7 +354,7 @@ include {fastqc as fastqc_raw; fastqc as fastqc_fastp; fastqc as fastqc_ali_bbma
          fastqc as fastqc_ali_bwaaln; fastqc as fastqc_ali_bwamem; fastqc as fastqc_ali_bwasw; fastqc as fastqc_ali_graphmap2 ; 
          fastqc as fastqc_ali_hisat2; fastqc as fastqc_ali_kallisto; fastqc as fastqc_ali_minimap2; fastqc as fastqc_ali_ngmlr; 
          fastqc as fastqc_ali_novoalign ; fastqc as fastqc_ali_nucmer; fastqc as fastqc_ali_star; fastqc as fastqc_ali_subread ; 
-         fastqc as fastqc_ali_sublong ; fastqc as fastqc_ali_tophat2} from "$baseDir/modules/fastqc.nf"
+         fastqc as fastqc_ali_sublong } from "$baseDir/modules/fastqc.nf"
 include {hisat2_index; hisat2} from "$baseDir/modules/hisat2.nf"
 include {kallisto_index; kallisto} from "$baseDir/modules/kallisto.nf" 
 include {minimap2_index; minimap2} from "$baseDir/modules/minimap2.nf" 
@@ -385,12 +370,11 @@ include {samtools_sam2bam_nucmer; samtools_sam2bam as samtools_sam2bam_bowtie; s
 include {samtools_sort as samtools_sort_bbmap; samtools_sort as samtools_sort_bowtie; samtools_sort as samtools_sort_bowtie2; samtools_sort as samtools_sort_bwaaln; 
          samtools_sort as samtools_sort_bwamem; samtools_sort as samtools_sort_bwasw; samtools_sort as samtools_sort_graphmap2; 
          samtools_sort as samtools_sort_hisat2; samtools_sort as samtools_sort_minimap2; samtools_sort as samtools_sort_ngmlr; 
-         samtools_sort as samtools_sort_novoalign;  samtools_sort as samtools_sort_nucmer; samtools_sort as samtools_sort_tophat2;
+         samtools_sort as samtools_sort_novoalign;  samtools_sort as samtools_sort_nucmer;
          samtools_sort as samtools_sort_sublong } from "$baseDir/modules/samtools.nf"
 include {seqtk_sample} from "$baseDir/modules/seqtk.nf" 
 include {subread_index; subread; sublong_index; sublong} from "$baseDir/modules/subread.nf"
 include {prepare_star_index_options; star_index; star; star2pass} from "$baseDir/modules/star.nf"
-include {tophat2_index; tophat2} from "$baseDir/modules/tophat.nf" 
 
 //*************************************************
 // STEP 3 - CHECK 2 for parameters
@@ -844,18 +828,6 @@ workflow align {
             }
         }
 
-        // --- TOPHAT2 ---
-        if ("tophat2" in aligner_list ){
-            tophat2_index(genome.collect(), "alignment/tophat2/indicies") // index
-            tophat2(reads, genome.collect(), tophat2_index.out.collect(), annotation.collect(), "alignment/tophat2") // align
-            logs.concat(tophat2.out.tophat2_summary).set{logs} // save log
-            samtools_sort_tophat2(tophat2.out.tuple_sample_bam, "alignment/tophat2")
-            if(params.fastqc){
-                fastqc_ali_tophat2(star_result, "fastqc/tophat2", "tophat2")
-                logs.concat(fastqc_ali_tophat2.out).set{logs} // save log
-            }
-        }
-
         // ------------------- MULTIQC -----------------
         multiqc(logs.collect(),params.multiqc_config)
 }
@@ -949,7 +921,6 @@ def helpMSG() {
         --read_length               [Optional][used by STAR] length of the reads, if none provided it is automatically deduced
         --subread_options           additional options for subread
         --sublong_options           additional options for sublong
-        --tophat2_options            additional options for tophat
 
     """
 }
@@ -1060,11 +1031,6 @@ def printAlignerOptions(aligner_list, annotation_file, star_index_options) {
     subread parameters
         subread_options            : ${params.subread_options}
     """}
-    if ("tophat2" in aligner_list){
-        sentence += """
-    tophat parameters
-        tophat2_options            : ${params.tophat2_options}
-    """}
 
     return sentence
 }

diff --git a/config/multiqc_conf.yml b/config/multiqc_conf.yml
@@ -6,7 +6,6 @@ run_modules:
     - bowtie2
     - hisat2
     - star
-    - tophat
     - kallisto
 
 module_order:
@@ -76,9 +75,4 @@ module_order:
     - fastqc:
         name: "FastQC (star)"
         path_filters:
-          - "*star_logs*"
-    - tophat
-    - fastqc:
-        name: "FastQC (tophat)"
-        path_filters:
-          - "*tophat2_logs*"
+          - "*star_logs*"
diff --git a/config/ressources/local.config b/config/ressources/local.config
@@ -33,8 +33,4 @@ process {
         cpus = 2
         time = '1h'
     }
-    withLabel: 'tophat2' {
-        cpus = 4
-        time = '4h'
-    }
 }
diff --git a/config/softwares.config b/config/softwares.config
@@ -62,7 +62,4 @@ process {
     withLabel: 'subread' {
         container = 'quay.io/biocontainers/subread:2.0.6--he4a0461_2'
     }
-    withLabel: 'tophat2' {
-        container = 'quay.io/biocontainers/tophat:2.1.1--py27_3'
-    }
 }
diff --git a/modules/tophat.nf b/modules/tophat.nf
diff --git a/paper/paper.bib b/paper/paper.bib
@@ -234,23 +234,6 @@ @article{subread
    url = {https://pubmed.ncbi.nlm.nih.gov/23558742/},
    year = {2013},
 }
-@article{tophat2,
-   abstract = {TopHat is a popular spliced aligner for RNA-sequence (RNA-seq) experiments. In this paper, we describe TopHat2, which incorporates many significant enhancements to TopHat. TopHat2 can align reads of various lengths produced by the latest sequencing technologies, while allowing for variable-length indels with respect to the reference genome. In addition to de novo spliced alignment, TopHat2 can align reads across fusion breaks, which can occur after genomic translocations. TopHat2 combines the ability to identify novel splice sites with direct mapping to known transcripts, producing sensitive and accurate alignments, even for highly repetitive genomes or in the presence of pseudogenes. TopHat2 is available at http://ccb.jhu.edu/software/tophat. © 2013 Kim et al.; licensee BioMed Central Ltd.},
-   author = {Daehwan Kim and Geo Pertea and Cole Trapnell and Harold Pimentel and Ryan Kelley and Steven L. Salzberg},
-   doi = {10.1186/GB-2013-14-4-R36},
-   issn = {1474760X},
-   issue = {4},
-   journal = {Genome Biology},
-   keywords = {Animal Genetics and Genomics,Bioinformatics,Evolutionary Biology,Human Genetics,Microbial Genetics and Genomics,Plant Genetics and Genomics},
-   month = {4},
-   pages = {1-13},
-   pmid = {23618408},
-   publisher = {BioMed Central},
-   title = {TopHat2: Accurate alignment of transcriptomes in the presence of insertions, deletions and gene fusions},
-   volume = {14},
-   url = {https://genomebiology.biomedcentral.com/articles/10.1186/gb-2013-14-4-r36},
-   year = {2013},
-}
 @misc{bwamem,
       title={Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM}, 
       author={Heng Li},

diff --git a/paper/paper.md b/paper/paper.md
@@ -20,7 +20,7 @@ bibliography: paper.bib
 
 Alignment of sequencing reads against a reference genome is a fundamental step in many bioinformatics workflows. Aligner performance varies by speed, memory efficiency, and accuracy, with some tailored to specific sequencing technologies and others more versatile, which makes the selection of an appropriate tool context-dependent. To streamline this process, we present AliNe (Alignment in Nextflow), a flexible and efficient read alignment pipeline built on the Nextflow framework [@nextflow]. AliNe contains a broad range of commonly used aligners, and is designed to accommodate any high-throughput sequencing projects.
 
-AliNe supports short reads (both paired-end and single-end) as well as long reads generated by PacBio and Oxford Nanopore Technologies (ONT). It currently supports 17 widely used alignment tools, including BBMap [@bbmap], Bowtie [@bowtie], Bowtie2 [@bowtie2], BWA [@bwaaln], BWA-MEM [@bwamem], BWA-SW [@bwasw], GraphMap2 [@graphmap2], HISAT2 [@hisat2], Kallisto [@kallisto], Minimap2 [@minimap2], ngmlr [@ngmlr], novoAlign [@novoalign], nucmer [@nucmer], STAR (single or two-pass mode) [@star], subread [@subread], sublong [@subread] and Tophat2 [@tophat2]. These aligners are integrated into a single, easy-to-use workflow, providing a unified entry point for any project requiring alignment and giving users the flexibility to choose the best tool for their specific data and objectives. AliNe is designed to minimize user inputs and avoid common parameter mistakes ( e.g. scoring system, strandedness, orientation).
+AliNe supports short reads (both paired-end and single-end) as well as long reads generated by PacBio and Oxford Nanopore Technologies (ONT). It currently supports 16 widely used alignment tools, including BBMap [@bbmap], Bowtie [@bowtie], Bowtie2 [@bowtie2], BWA [@bwaaln], BWA-MEM [@bwamem], BWA-SW [@bwasw], GraphMap2 [@graphmap2], HISAT2 [@hisat2], Kallisto [@kallisto], Minimap2 [@minimap2], ngmlr [@ngmlr], novoAlign [@novoalign], nucmer [@nucmer], STAR (single or two-pass mode) [@star], subread [@subread] and sublong [@subread]. These aligners are integrated into a single, easy-to-use workflow, providing a unified entry point for any project requiring alignment and giving users the flexibility to choose the best tool for their specific data and objectives. AliNe is designed to minimize user inputs and avoid common parameter mistakes ( e.g. scoring system, strandedness, orientation).
 
 # Statement of Need
 

diff --git a/profiles/test_illumina_paired.config b/profiles/test_illumina_paired.config
@@ -7,7 +7,7 @@
 params {
     reads = "$baseDir/test/illumina/"
     genome = "$baseDir/test/yeast.fa"
-    aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,minimap2,nucmer,star,subread,tophat2'
+    aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,minimap2,nucmer,star,subread'
     star_options = "--genomeSAindexNbases 9" // the default 14 is too large for the genome size=1351857
     multiqc_config = "$baseDir/config/multiqc_conf.yml"
 }
diff --git a/profiles/test_illumina_single.config b/profiles/test_illumina_single.config
@@ -8,7 +8,7 @@ params {
     reads = "$baseDir/test/illumina/"
     genome = "$baseDir/test/yeast.fa"
     params.read_type = "short_single"
-    aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong,tophat2'
+    aligner = 'bbmap,bowtie,bowtie2,bwaaln,bwamem,bwasw,graphmap2,hisat2,kallisto,minimap2,ngmlr,nucmer,star,subread,sublong'
     trimming_fastp = true
     star_options = "--genomeSAindexNbases 9" // the default 14 is too large for the genome size=1351857
     multiqc_config = "$baseDir/config/multiqc_conf.yml"