Merge pull request #23 from Juke34/19

add output structure
Juke34 · Jan 25, 2025 · 6af36e4 · 6af36e4
2 parents ea5cc66 + 1d1193f
commit 6af36e4
Show file tree

Hide file tree

Showing 3 changed files with 67 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@ AliNe is a pipeline written in Nextflow that aims to efficiently align reads aga
         * [Singularity](#singularity)  
    * [Usage and test](#usage)
    * [Parameters](#parameters)
+   * [Output](#output)
    * [Contributing](#contributing)
 
 ## Foreword
@@ -350,6 +351,63 @@ On success you should get a message looking like this:
         --sublong_options           additional options for sublong
 ```
 
+## Output
+
+Here the description of typical ouput you will get from AliNe:  
+
+```
+└── alignment_results                                         # Output folder set using --outdir. Default: <alignment_results>
+    │
+    ├── fastp                                                 # Folder - trimming with fastp (optional - if trimming activated by the user)
+    │   ├── sample1_fastp_report.html                         # fastp report for sample1
+    │   └── sample1_seqkit_trim.fastq.gz                      # sample1 trimmed fastq file
+    │
+    ├── seqkit_score                                          # Folder containing Sequencing scoring system detected with Seqkit
+    │   └── sample1.result.txt                                # Information about scoring system detected in sample1 (Phred+33, Phred+64 and Solexa), and change applied
+    │
+    ├── mean_read_length                                      # Folder with mean read length computed in bash (optional - done if selected aligners need the info and no value provided by the user)
+    │   └── sample1_seqkit_trim_sampled_read_length.txt       # Mean read length for sample1
+    │
+    ├── salmon_libtype                                        # Librairy information (read orientation and strand information) detected via Salmon
+    │       └── sample1_lib_format_counts.json                # Librairy information detectected for sample1
+    |
+    ├── alignment                                             # Folder gathering all alignment output (indicies, sorted bam and logs)
+    │   ├── aligner1                                          # Folder gathering data produced by aligner 
+    │   │   ├── indicies                                      # Contains the genome index for the aligner
+    │   │   │   └── ...                                       #
+    │   │   ├── sample1_seqkit_trim_aligner1_sorted.log       # Ccontains the log of the aligner
+    │   │   └── sample1_seqkit_trim_aligner1_sorted.bam       # Sorted bam output
+    │   └── aligner2                                          # Folder gathering data produced by aligner 
+    │       ├── indicies                                      # Contains the genome index for the aligner
+    │       │   └── ...                                       # 
+    │       ├── sample1_seqkit_trim_aligner2_sorted.log       # Contains the log of the aligner
+    │       └── sample1_seqkit_trim_aligner2_sorted.bam       # Sorted bam output
+    │
+    ├── fastqc                                                # FastQC statistics folder
+    │   ├── raw                                               # Folder with FastQC result for raw data
+    │   │   └── fastqc_sample1_raw_logs                       # Folder with FastQC result for raw sample1 data
+    │   │       ├── sample1_fastqc.html                       # FastQC interactive file summarizing the results of the analysis, with graphs and interpretations.
+    │   │       └── sample1_fastqc.zip                        # Contains all the detailed data and graphics generated by FastQC
+    │   └── trimming_fastp                                    # Folder with FastQC result for trimmed data (optional - if trimming activated by the user)
+    │   │   └── fastqc_sample1_trimmed_logs                   # FastQC output folder for trimmed sample1 data
+    │   │       ├── sample1_seqkit_trim_fastqc.html           # FastQC interactive file summarizing the results of the analysis, with graphs and interpretations.
+    │   │       └── sample1_seqkit_trim_fastqc.zip            # Contains all the detailed data and graphics generated by FastQC
+    │   ├── aligner1                                                 # FastQC output folder for data aligned with aligner1 
+    │   │   └── fastqc_sample1_aligner1_logs                         # FastQC output folder for sample1 data aligned with aligner1 
+    │   │       ├── sample1_seqkit_trim_aligner1_sorted_fastqc.html  # FastQC interactive file summarizing the results of the analysis, with graphs and interpretations.
+    │   │       └── sample1_seqkit_trim_aligner1_sorted_fastqc.zip   # Contains all the detailed data and graphics generated by FastQC
+    │   └── aligner2                                                 # FastQC output folder for data aligned with aligner2                      
+    │       └── fastqc_sample1_aligner2_logs                         # FastQC output folder for sample1 data aligned with aligner2  
+    │           ├── sample1_seqkit_trim_aligner2_sorted_fastqc.html  # FastQC interactive file summarizing the results of the analysis, with graphs and interpretations.
+    │           └── sample1_seqkit_trim_aligner2_sorted_fastqc.zip   # Contains all the detailed data and graphics generated by FastQC
+    │
+    └── MultiQC                                               # MultiQC folder that aggregate results across many samples into a single report
+        ├── multiqc_report.html                               # Report with interactive plots for statistics across many samples.
+        └── multiqc_report_data                               # Plot and data used by the multiqc_report.html
+
+```
+
+
 ## Contributing
 
 Contributions from the community are welcome ! See the [Contributing guidelines](https://github.com/Juke34/aline/blob/main/CONTRIBUTING.md)
diff --git a/aline.nf b/aline.nf
@@ -255,6 +255,10 @@ if ("ngmlr" in aligner_list ){
             //stop_pipeline = true
         //}
     }
+    if ( params.read_type == "short_paired"){
+        log.error "ngmlr aligner does not handle paired reads, please remove it from the list of aligner to use.\n"
+        stop_pipeline = true
+    }
 }       
 
 // novoalign tool - load license into the container
@@ -565,7 +569,7 @@ workflow align {
         if (params.library_type.contains("auto")){
             // ------------------- guess libtype -------------------
             salmon_index(genome.collect())
-            salmon_guess_lib(seqtk_sample.out.sampled, salmon_index.out.index, "salmon")
+            salmon_guess_lib(seqtk_sample.out.sampled, salmon_index.out.index, "salmon_libtype")
             salmon_guess_lib.out.tuple_id_libtype.set{tuple_id_lib}
         } else {
              set_tuple_withUserLib(raw_reads_trim)

diff --git a/modules/salmon.nf b/modules/salmon.nf
@@ -20,7 +20,7 @@ process salmon_index {
 
 process salmon_guess_lib {
     label 'salmon'
-    publishDir "${params.outdir}/${outpath}", pattern: "*", mode: 'copy'
+    publishDir "${params.outdir}/${outpath}", pattern: "*/*.json", mode: 'copy'
 
     input:
         tuple val(id), path(fastq)
@@ -29,7 +29,7 @@ process salmon_guess_lib {
 
     output:
         tuple val(id), env(LIBTYPE), emit: tuple_id_libtype
-
+        path "*/*lib_format_counts.json"
 
     script:
 
@@ -44,6 +44,8 @@ process salmon_guess_lib {
             salmon quant -i ${salmon_index} -l A ${input} --thread ${task.cpus} -o ${output} --minAssignedFrags 2 
             # extract the result
             LIBTYPE=\$(grep expected_format ${output}/lib_format_counts.json | awk '{print \$2}' | tr -d '",\n')
+            # change output name
+            mv ${output}/lib_format_counts.json ${output}/${id}_lib_format_counts.json
         """
 
 }