From 226057c5cf5157c2d82b4c501d333b40c123d6cb Mon Sep 17 00:00:00 2001
From: alex <alex.veglia@gmail.com>
Date: Thu, 14 Oct 2021 14:51:50 -0500
Subject: [PATCH 01/47] added treecluster to yml

---
 vampirus_env.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vampirus_env.yml b/vampirus_env.yml
index cb10503..868a0ef 100644
--- a/vampirus_env.yml
+++ b/vampirus_env.yml
@@ -38,3 +38,4 @@ dependencies:
   - r-biocmanager=1.30.10
   - pip:
     - oligotyping
+    - treecluster

From 1eaffcf3a5af4dfd558d26bee61b2b7dee2c797e Mon Sep 17 00:00:00 2001
From: aveglia <alex.veglia@upr.upr>
Date: Thu, 14 Oct 2021 14:57:52 -0500
Subject: [PATCH 02/47] altered the config file

---
 vampirus.config | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/vampirus.config b/vampirus.config
index aaeac84..dd7ac59 100644
--- a/vampirus.config
+++ b/vampirus.config
@@ -58,12 +58,24 @@ params {
         // Minimum length of overlap for sequence merging to occur for a pair
             minoverlap="10"
 
-    // ASV generation and clustering parameters
+    // ASV generation parameters
 
         // Alpha value for denoising - the higher the alpha the higher the chance of false positives in ASV generation (1 or 2)
             alpha="1"
         // Minimum size or representation in dataset for sequence to be considered in ASV generation (ex. If set to 4, any unique sequence that is not seen in the data more 3 times is removed)
             minSize="8"
+
+    // ASV filtering parameters - You can set the filtering to run with the command --filter
+
+        // Path to database containing sequences that if ASVs match, are then removed prior to any analyses
+            filtDB=""
+        // Path to database containing sequences that if ASVs match to, are kept for final ASV file to be used in susequent analyses
+            keepDB=""
+        // Keep any sequences without hits - for yes, set keepnohit to ="true"
+            keepnohit="true"
+
+    // ASV clustering parameters
+
         // Percent similarity to cluster nucleotide ASV sequences (used when --ncASV is set)
             clusterNuclID="85"
         // List of percent similarities to cluster nucleotide ASV sequences - must be separated by  a comma (ex. "95,96")
@@ -75,14 +87,11 @@ params {
         // Minimum length of amino acid translation to be considered during protein clustered ASV (pcASV) generation. Recommended to put this at the expected amino acid sequence length based on your maximum read length (e.g. if maxLen="420", then minAA should be 420/3 so 140)
             minAA="140"
 
-    // ASV filtering parameters - You can set the filtering to run with the command --filter
+    // Tree-based ASV/AminoType clustering parameters
+
+        //
+
 
-        // Path to database containing sequences that if ASVs match, are then removed prior to any analyses
-            filtDB=""
-        // Path to database containing sequences that if ASVs match to, are kept for final ASV file to be used in susequent analyses
-            keepDB=""
-        // Keep any sequences without hits - for yes, set keepnohit to ="true"
-            keepnohit="true"
 
         //Parameters for diamond command for filtering
 

From 4bbff63e3206d1c037ba77d985bf0e0c810c136f Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Fri, 15 Oct 2021 16:26:43 -0500
Subject: [PATCH 03/47] added more quality trimming parameters to config

---
 vAMPirus.nf     |  9 +++++----
 vampirus.config | 13 +++++++++++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index 78d02bb..7b66f3e 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -678,7 +678,7 @@ if (params.DataCheck || params.Analyze) {
                     echo ${sample_id}
 
                     fastp -i ${reads[0]} -I ${reads[1]} -o left-${sample_id}.filter.fq -O right-${sample_id}.filter.fq --detect_adapter_for_pe \
-                    --average_qual 25 -c --overrepresentation_analysis --html ${sample_id}.fastp.html --json ${sample_id}.fastp.json --thread ${task.cpus} \
+                    --average_qual ${params.avQ} -n ${params.nM} -c --overrepresentation_analysis --html ${sample_id}.fastp.html --json ${sample_id}.fastp.json --thread ${task.cpus} \
                     --report_title ${sample_id}
 
                     bash get_readstats.sh ${sample_id}.fastp.json
@@ -721,7 +721,7 @@ if (params.DataCheck || params.Analyze) {
                         RTRIM=\$( echo ${GlobTrim} | cut -f 2 -d "," )
                         bbduk.sh in=${reads[0]} out=${sample_id}_bb_R1.fastq.gz ftl=\${FTRIM} t=${task.cpus}
                         bbduk.sh in=${reads[1]} out=${sample_id}_bb_R2.fastq.gz ftl=\${RTRIM} t=${task.cpus}
-        		        repair.sh in1=${sample_id}_bb_R1.fastq.gz in2=${sample_id}_bb_R2.fastq.gz out1=${sample_id}_bbduk_R1.fastq.gz out2=${sample_id}_bbduk_R2.fastq.gz outs=sing.fq repair
+        		            repair.sh in1=${sample_id}_bb_R1.fastq.gz in2=${sample_id}_bb_R2.fastq.gz out1=${sample_id}_bbduk_R1.fastq.gz out2=${sample_id}_bbduk_R2.fastq.gz outs=sing.fq repair
                         """
                     } else if ( params.multi && params.primers ) {
                         """
@@ -881,11 +881,12 @@ if (params.DataCheck || params.Analyze) {
                 cat \$x | tr "\t" "," > \${pre}.csv
                 rm \$x
             done
-            reformat.sh in=${reads} out=${params.projtag}_preFilt_preclean.fasta t=${task.cpus}
+            bbduk.sh in=${reads} out=${params.projtag}_qtrimmed.fastq t=${task.cpu} qtrim=rl trimq=${params.trimq}
+            reformat.sh in=${params.projtag}_qtrimmed.fastq out=${params.projtag}_preFilt_preclean.fasta t=${task.cpus}
             echo "sample,reads" >> reads_per_sample_preFilt_preClean.csv
             grep ">" ${params.projtag}_preFilt_preclean.fasta | awk -F ">" '{print \$2}' | awk -F "." '{print \$1}' | sort --parallel=${task.cpus} | uniq -c | sort -brg --parallel=${task.cpus} | awk '{print \$2","\$1}' >> reads_per_sample_preFilt_preClean.csv
             rm ${params.projtag}_preFilt_preclean.fasta
-            fastp -i ${reads} -o ${params.projtag}_merged_preFilt_clean.fastq -b ${params.maxLen} -l ${params.minLen} --thread ${task.cpus} -n 1
+            fastp -i ${reads} -o ${params.projtag}_merged_preFilt_clean.fastq -b ${params.maxLen} -l ${params.minLen} --thread ${task.cpus} -n ${params.maxn}
             reformat.sh in=${params.projtag}_merged_preFilt_clean.fastq out=${params.projtag}_merged_preFilt_clean.fasta t=${task.cpus}
             bbduk.sh in=${params.projtag}_merged_preFilt_clean.fastq out=${params.projtag}_merged_clean_Lengthfiltered.fastq minlength=${params.maxLen} maxlength=${params.maxLen} t=${task.cpus}
             bbduk.sh in=${params.projtag}_merged_clean_Lengthfiltered.fastq bhist=${params.projtag}_all_merged_postFilt_baseFrequency_hist.txt qhist=${params.projtag}_all_merged_postFilt_qualityScore_hist.txt gchist=${params.projtag}_all_merged_postFilt_gcContent_hist.txt aqhist=${params.projtag}_all_merged_postFilt_averageQuaulity_hist.txt lhist=${params.projtag}_all_merged_postFilt_length_hist.txt gcbins=auto
diff --git a/vampirus.config b/vampirus.config
index dd7ac59..e1341a3 100644
--- a/vampirus.config
+++ b/vampirus.config
@@ -23,6 +23,15 @@ params {
         // Name of directory created to store output of vAMPirus analyses (Nextflow will create this directory in the working directory)
              outdir="results"
 
+    // Quality filter/trimming options
+
+        // Average read quality - forward or reverse reads will be discarded if average base quality across the read is below the number set below (25 is a good start)
+            avQ="25"
+        // Maximum number of "N"s acceptable in the forward or reverse reads (default for fastp is 5)
+            mN="5"
+        // Minmum base quality to be trimmed
+            trimq="15"
+
     // Primer Removal parameters
 
         // If not specifying primer sequences, forward and reverse reads will be trimmed by number of bases specified using "--GlobTrim #basesfromforward,#basesfromreverse"
@@ -53,8 +62,8 @@ params {
             maxEE="3"
         // Maximum number of non-matching nucleotides allowed in overlap region
             diffs="10"
-        // Maximum number of "N"'s in a sequence - if above the specified value, sequence will be discarded
-            maxn="20"
+        // Maximum number of "N"'s in a sequence - if above the specified value, sequence will be discarded (should be similar to what is set for "mN" above in fastp parameters)
+            maxn="10"
         // Minimum length of overlap for sequence merging to occur for a pair
             minoverlap="10"
 

From 28725c191bf0e9109e5d6936e71fd3fa759e772a Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Sun, 17 Oct 2021 11:28:26 -0500
Subject: [PATCH 04/47] added TreeClutser options and processes

---
 README.md       |  6 ++++-
 vAMPirus.nf     | 71 +++++++++++++++++++++++++++++++++++++++++++++++--
 vampirus.config | 45 +++++++++++++++++--------------
 3 files changed, 99 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index 11ba23e..b27400a 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,9 @@
 
 6. (EXPERIMENTAL) Added Minimum Entropy Decomposition analysis using the oligotyping program produced by the Meren Lab. This allows for sequence clustering based on sequence positions of interest (biologically meaningful) or top positions with the highest Shannon's Entropy (read more here: https://merenlab.org/software/oligotyping/ ; and below).
 
-7. Color nodes on phylogenetic trees based on Taxonomy or Minimum Entropy Decomposition results
+7. Phylogeny-based clustering ASV or AminoType sequences with TreeCluster (https://github.com/niemasd/TreeCluster; https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0221068)
+
+8. Color nodes on phylogenetic trees based on Taxonomy or Minimum Entropy Decomposition results
 
 8. PCoA plots added to Analyze  report if NMDS does not converge.
 
@@ -305,3 +307,5 @@ If you do use vAMPirus for your analyses, please cite the following ->
 15. UNOISE algorithm - R.C. Edgar (2016). UNOISE2: improved error-correction for Illumina 16S and ITS amplicon sequencing, https://doi.org/10.1101/081257
 
 16. Oligotyping - A. Murat Eren, Gary G. Borisy, Susan M. Huse, Jessica L. Mark Welch (2014). Oligotyping analysis of the human oral microbiome. Proceedings of the National Academy of Sciences Jul 2014, 111 (28) E2875-E2884; DOI: 10.1073/pnas.1409644111
+
+17. Balaban M, Moshiri N, Mai U, Jia X, Mirarab S (2019). "TreeCluster: Clustering biological sequences using phylogenetic trees." PLoS ONE. 14(8):e0221068. doi:10.1371/journal.pone.0221068
diff --git a/vAMPirus.nf b/vAMPirus.nf
index 7b66f3e..4331c0d 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2308,7 +2308,7 @@ if (params.DataCheck || params.Analyze) {
 
                       output:
                           tuple file("*_aln.fasta"), file("*_aln.html"), file("*.tree"), file("*.log"), file("*iq*"), file("*mt*") into align_results_asv
-                          file("*iq.treefile") into (nucl_phyl_plot_asv, asvphy_med)
+                          file("*iq.treefile") into (nucl_phyl_plot_asv, asvphy_med, asv_treeclust)
 
                       script:
                           """
@@ -2339,6 +2339,39 @@ if (params.DataCheck || params.Analyze) {
                   }
             }
 
+            if (params.asvTClust){
+
+                process ASV_PhyloClustering {
+
+                      label 'norm_cpus'
+
+                      publishDir "${params.workingdir}/${params.outdir}/Analyze/Analyses/ASVs/
+
+                      input:
+                         file(tree) from asv_treeclust
+
+                      output:
+
+
+                      script:
+                          """
+                          TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_ASV_treeclustering.out
+                          #create headless treeclustering.out
+                          tail -n +2 ${params.projtag}_ASV_treeclustering.out | sed 's/-1/0/g' > headless.treeout
+                          #summarizing clustering results
+                          awk -F "\t" '{print $2}' | sort | uniq > group.list
+                          g=1
+                          echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
+                          for x in \$(cat group.list)
+                          do    grep "$x" headless.out > tmp.tsv
+                                groupID="phyloGroup\${g}"
+                                awk -F "\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
+                                g=\$((\$g+1))
+                          done
+                          rm tmp.csv
+                }
+
+            }
             if (params.asvMED) {
 
                 process ASV_Minimum_Entropy_Decomposition {
@@ -2937,7 +2970,7 @@ if (params.DataCheck || params.Analyze) {
 
                         output:
                             tuple file("*_aln.fasta"), file("*_aln.html"), file("*.log"), file("*iq*"), file("*mt*") into alignprot_results
-                            file("*iq.treefile") into (amino_rax_plot, amino_repphy)
+                            file("*iq.treefile") into (amino_rax_plot, amino_repphy, amino_treeclust)
 
                         script:
                             """
@@ -2975,6 +3008,40 @@ if (params.DataCheck || params.Analyze) {
                     }
                 }
 
+                if (params.aminoTClust){
+
+                    process AminoType_PhyloClustering {
+
+                          label 'norm_cpus'
+
+                          publishDir "${params.workingdir}/${params.outdir}/Analyze/Analyses/ASVs/
+
+                          input:
+                             file(tree) from amino_treeclust
+
+                          output:
+    
+
+                          script:
+                              """
+                              TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_AminoType_treeclustering.out
+                              #create headless treeclustering.out
+                              tail -n +2 ${params.projtag}_AminoType_treeclustering.out | sed 's/-1/0/g' > headless.treeout
+                              #summarizing clustering results
+                              awk -F "\t" '{print $2}' | sort | uniq > group.list
+                              g=1
+                              echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
+                              for x in \$(cat group.list)
+                              do    grep "$x" headless.out > tmp.tsv
+                                    groupID="phyloGroup\${g}"
+                                    awk -F "\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
+                                    g=\$((\$g+1))
+                              done
+                              rm tmp.csv
+                    }
+
+                }
+
                 process Generate_AminoTypes_Counts_Table {
 
                     label 'high_cpus'
diff --git a/vampirus.config b/vampirus.config
index e1341a3..cb03ae3 100644
--- a/vampirus.config
+++ b/vampirus.config
@@ -51,7 +51,6 @@ params {
         // Minimum non-merged read length after adapter and primer removal (default = 200)
             minilen="100"
 
-
     // Merged read length filtering parameters
 
         // Minimum merged read length - reads with lengths greater than minLen and below the specified maximum read length will be used for counts only
@@ -83,6 +82,17 @@ params {
         // Keep any sequences without hits - for yes, set keepnohit to ="true"
             keepnohit="true"
 
+        //Parameters for diamond command for ASV filtering
+
+            // Set minimum percent amino acid similarity for best hit to be counted in taxonomy assignment
+                filtminID="80"
+            // Set minimum amino acid alignment length for best hit to be counted in taxonomy assignment
+                filtminaln="30"
+            // Set sensitivity parameters for DIAMOND aligner (read more here: https://github.com/bbuchfink/diamond/wiki; default = ultra-sensitive)
+                filtsensitivity="ultra-sensitive"
+            // Set the max e-value for best hit to be recorded
+                filtevalue="0.001"
+
     // ASV clustering parameters
 
         // Percent similarity to cluster nucleotide ASV sequences (used when --ncASV is set)
@@ -96,24 +106,6 @@ params {
         // Minimum length of amino acid translation to be considered during protein clustered ASV (pcASV) generation. Recommended to put this at the expected amino acid sequence length based on your maximum read length (e.g. if maxLen="420", then minAA should be 420/3 so 140)
             minAA="140"
 
-    // Tree-based ASV/AminoType clustering parameters
-
-        //
-
-
-
-        //Parameters for diamond command for filtering
-
-                // Set minimum percent amino acid similarity for best hit to be counted in taxonomy assignment
-                    filtminID="80"
-                // Set minimum amino acid alignment length for best hit to be counted in taxonomy assignment
-                    filtminaln="30"
-                // Set sensitivity parameters for DIAMOND aligner (read more here: https://github.com/bbuchfink/diamond/wiki; default = ultra-sensitive)
-                    filtsensitivity="ultra-sensitive"
-                // Set the max e-value for best hit to be recorded
-                    filtevalue="0.001"
-
-
     // Minimum Entropy Decomposition (MED) parameters for clustering (https://merenlab.org/2012/05/11/oligotyping-pipeline-explained/)
 
         // If you plan to do MED on ASVs using the option "--asvMED" you can set here the number of entopy peak positions or a comma seperated list of biologically meaningful positons (e.g. 35,122,21) for oligotyping to take into consideration. If you want to use a single specific position, make "asvSingle="true"".
@@ -123,6 +115,17 @@ params {
             aminoC=""
             aminoSingle="false"
 
+    // Phylogeny-based ASV/AminoType clustering parameters using the program TreeCluster (https://github.com/niemasd/TreeCluster)
+
+      // Add the "--asvTClust" option to the launch command to run phylogeny-based clustering of ASVs ; Add "--aminoTClust" to launch command for phylogeny-based clustering on AminoTypes
+      // NOTE: you can't use "--skipPhylogeny" when doing this form of sequence clustering
+
+        // TreeCluster command options for ASV clustering (--asvTClust) -- (Example: "-option1 A -option2 B -option3 C -option4 D") - See TreeCluster paper and github page to determine the best options (a good start is what is below)
+            asvTCopp="-t 0.045 -max_clade"
+
+        // TreeCluster command options for AminoType clustering (--aminoTClust) -- (Example: "-option1 A -option2 B -option3 C -option4 D") - See TreeCluster paper and github page to determine the best options
+            aminoTCopp="-t 0.045 -max_clade"
+
     // Counts table generation parameters
 
         // Percent similarity to use for ASV/cASV counts table generation with vsearch
@@ -169,7 +172,7 @@ params {
 
     // Phylogeny analysis parameters
 
-        // Color nodes on phylogenetic tree in Analyze report with MED Group information (nodeCol="MED") or taxonomy (nodeCol=TAX) hit. If you would like nodes colored by sequence ID, leave nodeCol="" below.
+        // Color nodes on phylogenetic tree in Analyze report with MED Group information (nodeCol="MED"), taxonomy (nodeCol=TAX) hit, or TreeCluster Group Information (nodeCol=TC). If you would like nodes colored by sequence ID, leave nodeCol="" below.
             nodeCol=""
 
         // Customs options for IQ-TREE (Example: "-option1 A -option2 B -option3 C -option4 D")
@@ -224,6 +227,8 @@ params {
         aminoMED = false
     // Filter ASVs
         filter = false
+    // Phylogeny-based ASV clustering
+        asvTClust = false
 // Skip options
     // Skip all Read Processing steps
         skipReadProcessing = false

From bb7e42d01376f3cd39ea2f5d33f870e984b341a6 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Sun, 17 Oct 2021 19:45:16 -0500
Subject: [PATCH 05/47] Added treeclustering info to docs and added outputs for
 treeclustering processes

---
 README.md                 |  2 +-
 docs/HelpDocumentation.md | 46 ++++++++++++++++++++++++++++++++-------
 vAMPirus.nf               | 13 ++++++-----
 3 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index b27400a..588be48 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@
 * [Running vAMPirus](#Running-vAMPirus)
 * [Who to cite](#Who-to-cite)
 
-# New in vAMPirus version 2.0.0
+# New in vAMPirus version 2.0.1
 
 1. Reduced redundancy of processes and the volume of generated result files per full run (Example - read processing only done once if running DataCheck then Analyze).
 
diff --git a/docs/HelpDocumentation.md b/docs/HelpDocumentation.md
index 20fb68b..1561b34 100644
--- a/docs/HelpDocumentation.md
+++ b/docs/HelpDocumentation.md
@@ -21,11 +21,11 @@ The vAMPirus program contains two different pipelines:
 
 If you have a feature request or any feedback/questions, feel free to email vAMPirusHelp@gmail.com or you can open an Issue on GitHub.
 
-## Changes in version 2.0.0
+## Changes in version 2.0.1
 
-1. (EXPERIMENTAL) Added Minimum Entropy Decomposition analysis using the oligotyping program produced by the Meren Lab. This allows for sequence clustering based on sequence positions of interest (biologically meaningful) or top positions with the highest Shannon's Entropy (read more here: https://merenlab.org/software/oligotyping/ ; and below).
+1. Reduced redundancy of processes and the volume of generated result files per full run (Example - read processing only done once if running DataCheck then Analyze).
 
-2. Added more useful taxonomic classification of sequences leveraging the RVDB annotation database and/or NCBI taxonomy files (read more below).
+2. Added further taxonomic classification of sequences using the RVDB annotation database and/or NCBI taxonomy files (see manual for more info).
 
 3. Replaced the used of MAFFT with muscle v5 (Edgar 2021) for more accurate virus gene alignments (see https://www.biorxiv.org/content/10.1101/2021.06.20.449169v1.full).
 
@@ -33,12 +33,13 @@ If you have a feature request or any feedback/questions, feel free to email vAMP
 
 5. ASV filtering - you can now provide a "filter" and "keep" database to remove certain sequences from the analysis
 
-6. Reduced redundancy of processes and the volume of generated result files per full run (Example - read processing only done once if running DataCheck then Analyze).
+6. (EXPERIMENTAL) Added Minimum Entropy Decomposition analysis using the oligotyping program produced by the Meren Lab. This allows for sequence clustering based on sequence positions of interest (biologically meaningful) or top positions with the highest Shannon's Entropy (read more here: https://merenlab.org/software/oligotyping/ ; and below).
 
-7. Color nodes on phylogenetic trees based on Taxonomy or Minimum Entropy Decomposition results
+7. Phylogeny-based clustering ASV or AminoType sequences with TreeCluster (https://github.com/niemasd/TreeCluster; https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0221068)
 
-8. PCoA plots added to report if NMDS does not converge.
+8. Color nodes on phylogenetic trees based on Taxonomy or Minimum Entropy Decomposition results
 
+8. PCoA plots added to Analyze  report if NMDS does not converge.
 
 ## Who to cite
 
@@ -76,6 +77,8 @@ If you do use vAMPirus for your analyses, please cite the following ->
 
 16. Oligotyping - A. Murat Eren, Gary G. Borisy, Susan M. Huse, Jessica L. Mark Welch (2014). Oligotyping analysis of the human oral microbiome. Proceedings of the National Academy of Sciences Jul 2014, 111 (28) E2875-E2884; DOI: 10.1073/pnas.1409644111
 
+17. Balaban M, Moshiri N, Mai U, Jia X, Mirarab S (2019). "TreeCluster: Clustering biological sequences using phylogenetic trees." PLoS ONE. 14(8):e0221068. doi:10.1371/journal.pone.0221068
+
 
 # Getting started with vAMPirus
 
@@ -1079,7 +1082,7 @@ Here are the options stored within the configuration file:
 
 ### Clustering options
 
-Dependining on the virus type/marker gene, ASV-level results can be noisy and to combat this vAMPirus has three different approaches to clustering ASV sequences:
+Depending on the virus type/marker gene, ASV-level results can be noisy and to combat this vAMPirus has three different approaches to clustering ASV sequences:
 
 
 1. AminoTyping -
@@ -1186,9 +1189,36 @@ MED related options within the configuration file:
                 aminoC=""
                 aminoSingle=""
 
+## Phylogeny-based clustering of ASV or AminoType sequences with TreeCluster (https://github.com/niemasd/TreeCluster; https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0221068)
+
+New in version 2.0.1, you can now use the program TreeCluster to cluster ASV or AminoType sequences based on phylogenetic relationship.
+
+From the TreeCluster GitHub (https://github.com/niemasd/TreeCluster):
+
+"TreeCluster is a tool that, given a tree T (Newick format) and a distance threshold t, finds the minimum number of clusters of the leaves of T such that some user-specified constraint is met in each cluster. The user can also specify a branch support threshold s such that no pair of leaves in any cluster can be connected by branches with support less than or equal to s. Note that all leaves given a cluster of -1 are singletons, meaning they did not cluster with any other leaves (i.e., each leaf with a cluster of -1 is in its own cluster).
+
+TreeCluster was motivated by Cluster Picker.
+
+The default method is "Max Clade" (see Clustering Methods). There is no explicit default distance threshold, but because Cluster Picker recommends a distance threshold of 0.045 and because the same objective function is optimized by both Cluster Picker and TreeCluster "Max Clade", we currently recommend 0.045 as well."
+
+If you plan to use this form of clustering, be sure to read through the TreeCluster manuscript and help documentation linked above to use the most appropriate command.
+
+To signal the use of phylogeny-based clustering, add "--asvTClust" to cluster ASVs and "--aminoTClust" to cluster aminotypes. You will then need to check and edit the TreeClust command you would like to use in the configuration file:
+
+      // Phylogeny-based ASV/AminoType clustering parameters using the program TreeCluster (https://github.com/niemasd/TreeCluster)
+
+        // Add the "--asvTClust" option to the launch command to run phylogeny-based clustering of ASVs ; Add "--aminoTClust" to launch command for phylogeny-based clustering on AminoTypes
+        // NOTE: you can't use "--skipPhylogeny" when doing this form of sequence clustering
+
+          // TreeCluster command options for ASV clustering (--asvTClust) -- (Example: asvTCopp="-option1 A -option2 B -option3 C -option4 D") - See TreeCluster paper and github page to determine the best options (a good start is what is below)
+              asvTCopp="-t 0.045 -m max_clade -s 0.5"
+
+          // TreeCluster command options for AminoType clustering (--aminoTClust) -- (Example: aminoTCopp"-option1 A -option2 B -option3 C -option4 D") - See TreeCluster paper and github page to determine the best options
+              aminoTCopp="-t 0.045 -m max_clade -s 0.5"
+
 ## Counts tables and percent ID matrices
 
-vAMPirus generates nucleotide-based counts tables using vsearch and protein-based counts tables using DIAMOND and a custom bash script. Counts tables and percent ID matrices are always produced for each ASV, AminoType and all cASV fasta files produced.
+vAMPirus generates nucleotide-based counts tables using vsearch and protein-based counts tables using DIAMOND and a custom bash script. Counts tables, pairwise distance, and percent ID matrices are always produced for each ASV, AminoType and all cASV fasta files produced.
 
 Here are the parameters you can edit at lines 61-70:
 
diff --git a/vAMPirus.nf b/vAMPirus.nf
index 4331c0d..db939ce 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2293,7 +2293,7 @@ if (params.DataCheck || params.Analyze) {
                 """
             }
 
-            if (!params.skipPhylogeny) { // need to edit paths
+            if (!params.skipPhylogeny) {
 
                 process ASV_Phylogeny {
 
@@ -2345,17 +2345,19 @@ if (params.DataCheck || params.Analyze) {
 
                       label 'norm_cpus'
 
-                      publishDir "${params.workingdir}/${params.outdir}/Analyze/Analyses/ASVs/
+                      publishDir "${params.workingdir}/${params.outdir}/Analyze/Analyses/ASVs/TreeClustering", mode: "copy", overwrite: true
 
                       input:
                          file(tree) from asv_treeclust
 
                       output:
-
+                          file("*treeclustering*.out") into asvtreeclustering_res
+                          file("*phylogroup.csv") into asv_phylogroupcsv
 
                       script:
                           """
                           TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_ASV_treeclustering.out
+                          reeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_ASV_treeclustering_verbose.out
                           #create headless treeclustering.out
                           tail -n +2 ${params.projtag}_ASV_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                           #summarizing clustering results
@@ -3014,13 +3016,14 @@ if (params.DataCheck || params.Analyze) {
 
                           label 'norm_cpus'
 
-                          publishDir "${params.workingdir}/${params.outdir}/Analyze/Analyses/ASVs/
+                          publishDir "${params.workingdir}/${params.outdir}/Analyze/Analyses/AminoTypes/TreeCluster, mode: "copy", overwrite: true
 
                           input:
                              file(tree) from amino_treeclust
 
                           output:
-    
+                             file("*treeclustering*.out") into aminotreeclustering_res
+                             file("*phylogroup.csv") into amino_phylogroupcsv
 
                           script:
                               """

From 5ed4105c31f053b4e80a738481cc0f9682324127 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 08:25:28 -0500
Subject: [PATCH 06/47] added pairwise distance matrices to datacheck report

---
 vAMPirus.nf | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index db939ce..bdf3aee 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -1059,7 +1059,7 @@ if (params.DataCheck || params.Analyze) {
 
             output:
                 file("number_per_percentage_nucl.csv") into number_per_percent_nucl_plot
-
+                file("${params.projtag}_ASV_PairwiseDistance.matrix") into asvpdm
             script:
             if (params.datacheckntIDlist) {
                 """
@@ -1073,6 +1073,9 @@ if (params.DataCheck || params.Analyze) {
                 done
                 yo=\$(grep -c ">" ${fasta})
     	          echo "1.0,\${yo}" >> number_per_percentage_nucl.csv
+                clustalo -i ${fasta} --distmat-out=${params.projtag}_distance.matrix --full --force --threads=${task.cpus}
+                cat ${params.projtag}_distance.matrix | tr " " ","  | grep "," >${params.projtag}_ASV_PairwiseDistance.matrix
+                rm ${params.projtag}_distance.matrix
                 """
                 }
             }
@@ -1111,6 +1114,8 @@ if (params.DataCheck || params.Analyze) {
             output:
                 file("number_per_percentage_prot.csv") into number_per_percent_prot_plot
                 file("*aminoacid_pcASV1.0_noTaxonomy.fasta") into amino_med
+                file("${params.projtag}_AminoType_PairwiseDistance.matrix") into aminopdm
+
             script:
             // add awk script to count seqs
                 """
@@ -1201,6 +1206,9 @@ if (params.DataCheck || params.Analyze) {
                 tail -\$(( \${yesirr}-1 )) number_per_percentage_protz.csv > number_per_percentage_prot.csv
                 head -1 number_per_percentage_protz.csv >> number_per_percentage_prot.csv
                 rm number_per_percentage_protz.csv
+                clustalo -i aminoacid_pcASV1.0_noTaxonomy.fasta --distmat-out=${params.projtag}_distance.matrix --full --force --threads=${task.cpus}
+                cat ${params.projtag}_distance.matrix | tr " " ","  | grep "," >${params.projtag}_AminoType_PairwiseDistance.matrix
+                rm ${params.projtag}_distance.matrix
                 """
         }
 
@@ -1473,7 +1481,7 @@ if (params.DataCheck || params.Analyze) {
         }
 
         report_dc_in = Channel.create()
-        fastp_csv_dc.mix( reads_per_sample_preFilt, reads_per_sample_postFilt, prefilt_basefreq, postFilt_basefreq, prefilt_qualityscore, postFilt_qualityscore, prefilt_gccontent, postFilt_gccontent, prefilt_averagequality, postFilt_averagequality, prefilt_length, postFilt_length, number_per_percent_nucl_plot, number_per_percent_prot_plot, amino_entro_csv, amino_entropy, asv_entro_csv, asv_entropy).into(report_dc_in)
+        fastp_csv_dc.mix( reads_per_sample_preFilt, reads_per_sample_postFilt, prefilt_basefreq, postFilt_basefreq, prefilt_qualityscore, postFilt_qualityscore, prefilt_gccontent, postFilt_gccontent, prefilt_averagequality, postFilt_averagequality, prefilt_length, postFilt_length, number_per_percent_nucl_plot, number_per_percent_prot_plot, amino_entro_csv, amino_entropy, asv_entro_csv, asv_entropy, asvpdm, aminopdm).into(report_dc_in)
 
         process Report_DataCheck {
 

From 3651666092562d17c5e03aa76b6895669b3d57ed Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 08:59:10 -0500
Subject: [PATCH 07/47] added pariwise distance heatmaps to DC report

---
 bin/vAMPirus_DC_Report.Rmd | 98 +++++++++++++++++++++++++++++++-------
 1 file changed, 80 insertions(+), 18 deletions(-)

diff --git a/bin/vAMPirus_DC_Report.Rmd b/bin/vAMPirus_DC_Report.Rmd
index c986bb5..b4eea2c 100644
--- a/bin/vAMPirus_DC_Report.Rmd
+++ b/bin/vAMPirus_DC_Report.Rmd
@@ -39,11 +39,9 @@ library(scales)
 library(cowplot)
 library(dplyr)
 library(plotly)
-#library(BiocParallel)
 library(knitr)
-library(kableExtra) #install.packages("kableExtra")
+library(kableExtra)
 library(rmarkdown)
-#register(MulticoreParam(4))
 ```
 
 ```{r colors, include=FALSE}
@@ -360,6 +358,12 @@ postlhp <- postlhp %>% layout(yaxis = list(title = "Count"))
 postlhp <- postlhp %>% layout(xaxis = list(title = "Read Length"))
 postlhp <- postlhp %>% config(toImageButtonOptions=list(format='svg',filename='ReadsperLen_postfilt', height= 500, width= 800, scale= 1))
 postlhp
+```
+<br>
+```{bash load_datasets_bash, include=FALSE}
+mv *AminoType_PairwiseDistance.matrix ./amino_matrix.txt
+mv *_ASV_PairwiseDistance.matrix ./asv_matrix.txt
+
 ```
 
 <br>
@@ -383,24 +387,35 @@ nnp
 NOTE: The "1" on the x-axis represents number of ASVs identified by vsearch
 <br>
 
-### Number of pcASVs per clustering percentage
-
-<br>
-
-```{r prot_number, echo=FALSE}
-pn=read.csv("number_per_percentage_prot.csv", header=TRUE)
-#pn=read.csv("number_per_percentage_prot.csv", header=T)
-pnp <- plot_ly(pn, x=pn[,1], y=pn[,2], type="scatter", mode = 'lines+markers', marker=list(color='#088da5', line=list(color = 'black',
-               width = .1)), hovertemplate = paste('ID%: %{x}','<br>Number of pcASVs: %{y}','<extra></extra>'))
-pnp <- pnp %>% layout(yaxis = list(title = "Number of pcASVs"))
-pnp <- pnp %>% layout(xaxis = list(title = "Clustering ID %"))
-pnp <- pnp %>% config(toImageButtonOptions=list(format='svg',filename='Protclustresults', height= 500, width= 800, scale= 1))
-pnp
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;ASV Pairwise Distance Heatmap</h2></div>
+
+<br>
+<br>
+```{r asvheatmap, echo=FALSE}
+simmatrix<- read.csv("asv_matrix.txt", header=FALSE)
+rownames(simmatrix) <- simmatrix[,1]
+simmatrix <- simmatrix[,-1]
+colnames(simmatrix) <-rownames(simmatrix)
+cols <- dim(simmatrix)[2]
+simmatrix$AA <- rownames(simmatrix)
+rval=nrow(simmatrix)
+simmatrix2 <- simmatrix %>%
+  gather(1:rval, key=sequence, value=similarity)
+x=reorder(simmatrix2$AA,simmatrix2$similarity)
+y=reorder(simmatrix2$sequence,simmatrix2$similarity)
+similaritymatrix <- ggplot(simmatrix2, aes(x=x, y=y,fill=similarity))+
+      geom_raster()+
+      scale_fill_distiller(palette="Spectral")+
+      theme(axis.text.x = element_text(angle = 90))+
+      theme(axis.title.x=element_blank())+
+      theme(axis.title.y=element_blank())
+
+heat <- ggplotly(similaritymatrix)
+heat <- heat %>% config(toImageButtonOptions=list(format='svg',filename='heatmap', height= 500, width= 800, scale= 1))
+heat
 ```
-NOTE: The "1" represents the number of AminoTypes which are unique amino acid sequences in your dataset
 <br>
 
-
 ### ASV Shannon Entropy Analysis (https://merenlab.org/2012/05/11/oligotyping-pipeline-explained/)
 
 <br>
@@ -435,6 +450,53 @@ paged_table(med_asv_csv, options = list(rows.print = 10))
 ```
 <br>
 
+### Number of pcASVs per clustering percentage
+
+<br>
+
+```{r prot_number, echo=FALSE}
+pn=read.csv("number_per_percentage_prot.csv", header=TRUE)
+#pn=read.csv("number_per_percentage_prot.csv", header=T)
+pnp <- plot_ly(pn, x=pn[,1], y=pn[,2], type="scatter", mode = 'lines+markers', marker=list(color='#088da5', line=list(color = 'black',
+               width = .1)), hovertemplate = paste('ID%: %{x}','<br>Number of pcASVs: %{y}','<extra></extra>'))
+pnp <- pnp %>% layout(yaxis = list(title = "Number of pcASVs"))
+pnp <- pnp %>% layout(xaxis = list(title = "Clustering ID %"))
+pnp <- pnp %>% config(toImageButtonOptions=list(format='svg',filename='Protclustresults', height= 500, width= 800, scale= 1))
+pnp
+```
+NOTE: The "1" represents the number of AminoTypes which are unique amino acid sequences (AminoTypes) in your dataset
+
+<br>
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;AminoType Pairwise Distance Heatmap</h2></div>
+
+<br>
+<br>
+```{r aminoheatmap, echo=FALSE}
+simmatrix<- read.csv("amino_matrix.txt", header=FALSE)
+rownames(simmatrix) <- simmatrix[,1]
+simmatrix <- simmatrix[,-1]
+colnames(simmatrix) <-rownames(simmatrix)
+cols <- dim(simmatrix)[2]
+simmatrix$AA <- rownames(simmatrix)
+rval=nrow(simmatrix)
+simmatrix2 <- simmatrix %>%
+  gather(1:rval, key=sequence, value=similarity)
+x=reorder(simmatrix2$AA,simmatrix2$similarity)
+y=reorder(simmatrix2$sequence,simmatrix2$similarity)
+similaritymatrix <- ggplot(simmatrix2, aes(x=x, y=y,fill=similarity))+
+      geom_raster()+
+      scale_fill_distiller(palette="Spectral")+
+      theme(axis.text.x = element_text(angle = 90))+
+      theme(axis.title.x=element_blank())+
+      theme(axis.title.y=element_blank())
+
+heat <- ggplotly(similaritymatrix)
+heat <- heat %>% config(toImageButtonOptions=list(format='svg',filename='heatmap', height= 500, width= 800, scale= 1))
+heat
+```
+<br>
+<br>
+
 ### AminoTypes Shannon Entropy Analysis (https://merenlab.org/2012/05/11/oligotyping-pipeline-explained/)
 
 <br>

From 0965b8714f1d1a62e1b34f120923ee920c7f312f Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 09:04:05 -0500
Subject: [PATCH 08/47] phylogrouping added to nf

---
 vAMPirus.nf | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index bdf3aee..536aff3 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2365,7 +2365,7 @@ if (params.DataCheck || params.Analyze) {
                       script:
                           """
                           TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_ASV_treeclustering.out
-                          reeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_ASV_treeclustering_verbose.out
+                          TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_ASV_treeclustering_verbose.out
                           #create headless treeclustering.out
                           tail -n +2 ${params.projtag}_ASV_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                           #summarizing clustering results
@@ -2379,9 +2379,11 @@ if (params.DataCheck || params.Analyze) {
                                 g=\$((\$g+1))
                           done
                           rm tmp.csv
+                          """
                 }
 
             }
+
             if (params.asvMED) {
 
                 process ASV_Minimum_Entropy_Decomposition {
@@ -3018,13 +3020,13 @@ if (params.DataCheck || params.Analyze) {
                     }
                 }
 
-                if (params.aminoTClust){
+                if (params.aminoTClust) {
 
                     process AminoType_PhyloClustering {
 
                           label 'norm_cpus'
 
-                          publishDir "${params.workingdir}/${params.outdir}/Analyze/Analyses/AminoTypes/TreeCluster, mode: "copy", overwrite: true
+                          publishDir "${params.workingdir}/${params.outdir}/Analyze/Analyses/AminoTypes/TreeCluster", mode: "copy", overwrite: true
 
                           input:
                              file(tree) from amino_treeclust
@@ -3036,6 +3038,7 @@ if (params.DataCheck || params.Analyze) {
                           script:
                               """
                               TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_AminoType_treeclustering.out
+                              TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_ASV_treeclustering_verbose.out
                               #create headless treeclustering.out
                               tail -n +2 ${params.projtag}_AminoType_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                               #summarizing clustering results
@@ -3049,10 +3052,11 @@ if (params.DataCheck || params.Analyze) {
                                     g=\$((\$g+1))
                               done
                               rm tmp.csv
-                    }
-
+                             """
+                         }
                 }
 
+
                 process Generate_AminoTypes_Counts_Table {
 
                     label 'high_cpus'
@@ -4304,7 +4308,7 @@ if (params.DataCheck || params.Analyze) {
                 */
 
                 report_asv = Channel.create()
-                asv_counts_plots.mix(taxplot_asv, asv_heatmap, nucl_phyl_plot_asv, asvgroupscsv, asvgroupcounts, asv_group_rep_tree, tax_table_asv, tax_nodCol_asv).flatten().buffer(size:9).dump(tag:'asv').into(report_asv)
+                asv_counts_plots.mix(taxplot_asv, asv_heatmap, nucl_phyl_plot_asv, asvgroupscsv, asvgroupcounts, asv_group_rep_tree, tax_table_asv, tax_nodCol_asv, asv_phylogroupcsv).flatten().buffer(size:10).dump(tag:'asv').into(report_asv)
 
                 if (params.ncASV) {
                     report_ncasv = Channel.create()
@@ -4349,7 +4353,7 @@ if (params.DataCheck || params.Analyze) {
 
                 if (!params.skipAminoTyping) {
                     report_aminotypes = Channel.create()
-                    aminocounts_plot.mix(taxplot2, aminotype_heatmap, amino_rax_plot, atygroupscsv, amino_group_rep_tree, amino_groupcounts, tax_table_amino, tax_nodCol_amino).flatten().buffer(size:9).dump(tag:'amino').into(report_aminotypes)
+                    aminocounts_plot.mix(taxplot2, aminotype_heatmap, amino_rax_plot, atygroupscsv, amino_group_rep_tree, amino_groupcounts, tax_table_amino, tax_nodCol_amino, amino_phylogroupcsv).flatten().buffer(size:10).dump(tag:'amino').into(report_aminotypes)
                     /*
                     Report_AminoTypes
                     aminocounts_plot -> ${params.projtag}_AminoType_counts.csv

From b23e58768e0124344a0ef26d9d9fe26aa93c4ab9 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 09:11:06 -0500
Subject: [PATCH 09/47] added phylogroup to report

---
 bin/vAMPirus_Report.Rmd | 581 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 580 insertions(+), 1 deletion(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index 51904cc..e747313 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -736,7 +736,7 @@ if (params$skipPhylogeny == "true") {
     p2 <- p1 + geom_point(data = metat2, aes(x = x, y = y, label = id, color = GroupID))
     gg = ggplotly(p2, tooltip = c("label","GroupID"))
     gg <- gg %>% config(toImageButtonOptions=list(format='svg',filename='Tree_MED_colors', height= 500, width= 800, scale= 1))
-    gg       
+    gg
 } else if ((params$skipTaxonomy == "false") && (params$nodeCol == "TAX")) {
     taxMap=read.csv("quicker_taxbreakdown.csv", header=T, check.names = F)
     tree=read.tree("tree.txt")
@@ -770,6 +770,585 @@ This tree is a maximum likelihood tree made with IQTREE2 and the parameters you
 <br>
 <br>
 <br>
+# Post Phylogeny-based Clustering Analyses
+```{bash bash_phy_table, include=FALSE}
+if [[ $(ls | grep -wc "asv_medfile.csv") -eq 1 ]];then
+  awk -F "," '{print $2}' asv_medfile.csv | sort | uniq | sort -g >group.list
+  for x in $(cat group.list);do
+    echo "${x}" >> ${x}.col
+    grep -w "$x" asv_medfile.csv | awk -F "," '{print $1}' | sort >> ${x}.col
+  done
+  paste -d "," *.col > asv_medtable.csv
+  rm *.col
+else
+    echo "Not MED"
+fi
+
+if [[ $(ls | grep -wc "amino_medfile.csv") -eq 1 ]];then
+  awk -F "," '{print $2}' amino_medfile.csv | sort | awk -F "Group" '{print $2}' | sort -n | uniq > group.list
+  for x in $(cat group.list);do
+    gr="Group"${x}""
+    echo "${gr}" > ${x}.col
+    grep -w "$gr" amino_medfile.csv | awk -F "," '{print $1}' | sort >> ${x}.col
+  done
+  paste -d "," *.col > amino_medtable.csv
+  rm *.col
+else
+      echo "Not MED"
+fi
+```
+
+```{r PhyloGroup_table, echo=FALSE}
+if (params$asvMED == "true" && params$type == "ASV"){
+    #med_group=read.csv("asv_medfile.csv", header = F)
+    #colnames(med_group) <- c("SequenceID", "Group", "Sequence")
+    # change name of sequence to med peak or something
+    #paged_table(med_group,options = list(rows.print = 20))
+    med_group=read.csv("asv_medtable.csv", header = T)
+    knitr::kable(med_group, digits = 2, align = 'c', booktabs = TRUE, caption = "Table: ASV MED Table") %>%
+    kable_styling(font_size = 12, full_width = F)%>%
+    scroll_box(width = "100%", height = "100%")
+}
+
+if (params$aminoMED == "true" && params$type == "AminoType"){
+  #med_group=read.csv("amino_medfile.csv", header = F)
+  #colnames(med_group) <- c("SequenceID", "Group", "Sequence")
+  #paged_table(med_group,options = list(rows.print = 20))
+  med_group=read.csv("amino_medtable.csv", header = T)
+  knitr::kable(med_group, digits = 2, align = 'c', booktabs = TRUE, caption = "Table: Aminotype MED Table") %>%
+  kable_styling(font_size = 12, full_width = F) %>%
+  scroll_box(width = "100%", height = "100%")
+}
+```
+
+<br>
+<br>
+<br>
+<br>
+<br>
+
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;PhyloGroup Relative Abundance Plot</h2></div>
+
+<br>
+<br>
+
+```{r med_plot, echo=FALSE}
+if (params$asvMED == "true" && params$type == "ASV"){
+  #medgroup=read.csv("uVP_AminoType_Groupingcounts.csv", check.names=FALSE)
+  medgroup=read.csv("asv_groupcounts.csv", check.names=FALSE)
+  meddata <- medgroup %>% select(-2)
+  meddata2 <- aggregate(.~GroupID, meddata, sum)
+  meddata3 <-as.data.frame(t(meddata2))
+  meddata3$sample <- row.names(meddata3)
+  colnames(meddata3)<- as.matrix(meddata3[1,])
+  as.data.frame(meddata3)
+  meddata3 <- meddata3[-1,]
+  meddata3 <- meddata3 %>%
+    rename(sample=GroupID)
+  meddata3dim <- dim(meddata3)
+  ##Loading metadata
+  samples <- read.csv(sample_metadata, header = TRUE)
+  #samples <- read.csv("rna_virus_meta.csv", header = TRUE)
+  ##Combining data and metadata
+  meddata4 <- merge(meddata3, samples, by="sample")
+  #if "meddata4" removed it does not work
+  #meddata4
+  dim_meddata4 <- dim(meddata4)
+  dim_samples <- dim(samples)
+  cols <- dim_meddata4[2]-dim_samples[2]+1
+  first <-colnames(meddata4)[2]
+  last <- colnames(meddata4)[cols]
+  meddata4[,2:cols] <- lapply(meddata4[,2:cols], as.character)
+  meddata4[,2:cols] <- lapply(meddata4[,2:cols], as.numeric)
+  #Calculate total reads per sample
+  meddata5 <- meddata4%>%
+    mutate(sum=select(.,2:cols)%>%
+             apply(1, sum, na.rm=TRUE))
+  #meddata5
+  ##Filter samples with low reads
+  nfil=params$minimumCounts
+  #nfil=1000
+  meddata5 <- meddata5 %>%
+    filter(sum>nfil)
+     #can cause errors
+  meddata5dim <-dim(meddata5)
+  minreads<-min(meddata5$sum)
+
+  dataz <- decostand(meddata5[,2:cols],method="total") #method 'total' normalizes data to sum up to 1
+  dataz$sample <- meddata5$sample
+  metadata <- meddata5[,(cols+1):meddata5dim[2]]
+  metadata$sample <- meddata5$sample
+  datay <- merge(dataz, metadata, by="sample")
+  datalong <- datay %>%
+    tidyr::gather(first:last, key=hit, value=reads)
+  ddd <- plot_ly(datalong, x=~sample, y=~reads, color=~hit, colors=mycol, type='bar')
+  ddd <- ddd %>% layout(barmode = 'stack')
+  ddd <- ddd %>% layout(legend = list(x=10,y=.5), xaxis=list(title = "Sample"), yaxis=list(title = "Relative abundance"))
+  ddd <- ddd %>% config(toImageButtonOptions=list(format='svg',filename='Relative_abundance', height= 500, width= 800, scale= 1))
+  ddd
+}
+if (params$aminoMED == "true" && params$type == "AminoType"){
+  #medgroup=read.csv("uVP_AminoType_Groupingcounts.csv")
+  medgroup=read.csv("amino_groupcounts.csv",  check.names=FALSE)
+  meddata <- medgroup %>% select(-2)
+  meddata2 <- aggregate(.~GroupID, meddata, sum)
+  meddata3 <-as.data.frame(t(meddata2))
+  meddata3$sample <- row.names(meddata3)
+  colnames(meddata3)<- as.matrix(meddata3[1,])
+  as.data.frame(meddata3)
+  meddata3 <- meddata3[-1,]
+  meddata3 <- meddata3 %>%
+    rename(sample=GroupID)
+  meddata3dim <- dim(meddata3)
+  ##Loading metadata
+  samples <- read.csv(sample_metadata, header = TRUE)
+  ##Combining data and metadata
+  meddata4 <- merge(meddata3, samples, by="sample")
+  #if "meddata4" removed it does not work
+  #meddata4
+  dim_meddata4 <- dim(meddata4)
+  dim_samples <- dim(samples)
+  cols <- dim_meddata4[2]-dim_samples[2]+1
+  first <-colnames(meddata4)[2]
+  last <- colnames(meddata4)[cols]
+  meddata4[,2:cols] <- lapply(meddata4[,2:cols], as.character)
+  meddata4[,2:cols] <- lapply(meddata4[,2:cols], as.numeric)
+  #Calculate total reads per sample
+  meddata5 <- meddata4%>%
+    mutate(sum=select(.,2:cols)%>%
+             apply(1, sum, na.rm=TRUE))
+  #meddata5
+  ##Filter samples with low reads
+  nfil=params$minimumCounts
+  #nfil=1000
+  meddata5 <- meddata5 %>%
+    filter(sum>nfil)
+     #can cause errors
+  meddata5dim <-dim(meddata5)
+  minreads<-min(meddata5$sum)
+
+  dataz <- decostand(meddata5[,2:cols],method="total") #method 'total' normalizes data to sum up to 1
+  dataz$sample <- meddata5$sample
+  metadata <- meddata5[,(cols+1):meddata5dim[2]]
+  metadata$sample <- meddata5$sample
+  datay <- merge(dataz, metadata, by="sample")
+  datalong <- datay %>%
+    tidyr::gather(first:last, key=hit, value=reads)
+  ddd <- plot_ly(datalong, x=~sample, y=~reads, color=~hit, colors=mycol, type='bar')
+  ddd <- ddd %>% layout(barmode = 'stack')
+  ddd <- ddd %>% layout(legend = list(x=10,y=.5), xaxis=list(title = "Sample"), yaxis=list(title = "Relative abundance"))
+  ddd <- ddd %>% config(toImageButtonOptions=list(format='svg',filename='Relative_abundance', height= 500, width= 800, scale= 1))
+  ddd
+}
+```
+
+<br>
+<br>
+<br>
+<br>
+<br>
+
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;PhyloGroup Rarefaction Curves</h2></div>
+
+<br>
+<br>
+
+```{r phy_rarefaction, echo=FALSE, cache=FALSE}
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+##Rarefaction curves
+rarefaction <- rarecurve(meddata5[,2:cols])
+
+##rarefied dataset
+raredata <- as.data.frame(rrarefy(meddata5[,2:cols], sample=minreads))
+}
+```
+
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;PhyloGroup Diversity Analyses Plots</h2></div>
+
+<br>
+<br>
+
+### PhyloGroup Shannon diversity
+
+<br>
+```{r phy_diversity_analysis1a, echo=FALSE}
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+    metadata <- meddata5[,(cols+1):meddata5dim[2]]
+    metadata$sample <- meddata5$sample
+    index <-diversity(raredata, index= "shannon")
+    shannondata5 <- as.data.frame(index)
+    shannondata5$sample<- meddata5$sample
+    shannondata5_2 <- merge(shannondata5, metadata, by="sample")
+
+    sh <- plot_ly(shannondata5_2, x=~treatment, y=~index, color=~treatment, colors=mycol, type="box", boxpoints = "all", pointpos = 0, jitter = 0.5)
+    #sh <- sh %>% layout(title = list(text="Shannon diversty",y=.99))
+    sh <- sh %>% layout(legend = list(x=10,y=.5), yaxis=list(title = "Index"), xaxis=list(title = "Treatment"))
+    sh <- sh %>% config(toImageButtonOptions=list(format='svg',filename='ShannonDiv', height= 500, width= 800, scale= 1))
+    sh
+}
+```
+
+```{r phy_diversity_analysis1b, echo=FALSE}
+#divided because error
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+    if (params$stats == "true" ) {
+        shannonaov <- aov(index ~ treatment, data= shannondata5_2)
+        st <- shapiro.test(resid(shannonaov))
+        bt <- bartlett.test(index ~ treatment, data= shannondata5_2)
+
+        if (st$p.value > .05 && bt$p.value > .05) {
+          print("Shapiro Test of normality - data is normal p-value > 0.05")
+          print(shapiro.test(resid(shannonaov)))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("Bartlett Test variance homogeneity - variance is homogeneous p-value > 0.05")
+          print(bartlett.test(index ~ treatment, data= shannondata5_2))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("ANOVA Results")
+          print(summary(shannonaov))
+          writeLines("\n--------------------------------------------------------------\n")
+          #Tukey Honest Significant Differences (pairwise comparison) - significant p <.05
+          print("Tukey HSD - Pairwise comparison - significant differences indicated by p-value < 0.05")
+          print(TukeyHSD(shannonaov))
+          writeLines("\n--------------------------------------------------------------\n")
+        } else {
+          print("Shapiro Test of normality - data is normal if p-value > 0.05")
+          print(shapiro.test(resid(shannonaov)))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("Bartlett Test variance homogeneity - variance is homogeneous if p-value > 0.05")
+          print(bartlett.test(index ~ treatment, data= shannondata5_2))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("Data either not normal or variance not homogenous")
+          print("Kruskal-Wallis Test - test significant if p <.05")
+          #Kruskal-Wallis test - significant p <.05
+          mykt <- kruskal.test(index ~ treatment, data= shannondata5_2)
+          print(mykt)
+          writeLines("\n--------------------------------------------------------------\n")
+          if (mykt$p.value < .05) {
+            #Pairwise comparison
+            print("Wilcox.test - pairwise comparison")
+            print(pairwise.wilcox.test(shannondata5_2$index, shannondata5_2$treatment, p.adjust.method = "BH"))
+          } else {
+            print("Data not significant. Skipping pairwise comparison")
+          }
+          writeLines("\n--------------------------------------------------------------\n")
+          print("ANOVA - one or more of the assumptions not met, take with a grain of salt.")
+          print(summary(shannonaov))
+          writeLines("\n--------------------------------------------------------------\n")
+        }
+    } else {
+        print("Stats skipped. To toggle on use \"--stats run\" in vAMPirus launch command")
+    }
+}
+```
+<br>
+<br>
+<br>
+<br>
+
+### PhyloGroup Simpson diversity
+
+<br>
+```{r phy_diversity_analysis2a, echo=FALSE}
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+    index <- diversity(raredata, index= "simpson")
+    simpsondata5 <- as.data.frame(index)
+    simpsondata5$sample<- meddata5$sample
+    simpsondata5_2 <- merge(simpsondata5, metadata, by="sample")
+
+    s <- plot_ly(simpsondata5_2, x=~treatment, y=~index, color=~treatment, colors=mycol, type="box", boxpoints = "all", pointpos = 0, jitter = 0.5)
+    #s <- s %>% layout(title = list(text="Simpson diversty",y=.99))
+    s <- s %>% layout(legend = list(x=10,y=.5), yaxis=list(title = "Index"), xaxis=list(title = "Treatment"))
+    s <- s %>% config(toImageButtonOptions=list(format='svg',filename='SimpsonDiv', height= 500, width= 800, scale= 1))
+    s
+}
+```
+
+```{r phy_diversity_analysis2b, echo=FALSE}
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+    if (params$stats == "true" ) {
+        simpsonaov <- aov(index ~ treatment, data= simpsondata5_2)
+        st <- shapiro.test(resid(simpsonaov))
+        bt <- bartlett.test(index ~ treatment, data= simpsondata5_2)
+
+        if (st$p.value > .05 && bt$p.value > .05) {
+          print("Shapiro Test of normality - data is normal p-value > 0.05")
+          print(shapiro.test(resid(simpsonaov)))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("Bartlett Test variance homogeneity - variance is homogeneous p-value > 0.05")
+          print(bartlett.test(index ~ treatment, data= simpsondata5_2))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("ANOVA Results")
+          print(summary(simpsonaov))
+          writeLines("\n--------------------------------------------------------------\n")
+          #Tukey Honest Significant Differences (pairwise comparison) - significant p <.05
+          print("Tukey HSD - Pairwise comparison - significant differences indicated by p-value < 0.05")
+          print(TukeyHSD(simpsonaov))
+          writeLines("\n--------------------------------------------------------------\n")
+        } else {
+          print("Shapiro Test of normality - data is normal if p-value > 0.05")
+          print(shapiro.test(resid(simpsonaov)))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("Bartlett Test variance homogeneity - variance is homogeneous if p-value > 0.05")
+          print(bartlett.test(index ~ treatment, data= simpsondata5_2))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("Data either not normal or variance not homogenous")
+          print("Kruskal-Wallis Test - test significant if p <.05")
+          #Kruskal-Wallis test - significant p <.05
+          mykt <- kruskal.test(index ~ treatment, data= simpsondata5_2)
+          print(mykt)
+          writeLines("\n--------------------------------------------------------------\n")
+          if (mykt$p.value < .05) {
+            #Pairwise comparison
+            print("Wilcox.test - pairwise comparison")
+            print(pairwise.wilcox.test(simpsondata5_2$index, simpsondata5_2$treatment, p.adjust.method = "BH"))
+          } else {
+            print("Data not significant. Skipping pairwise comparison")
+          }
+          writeLines("\n--------------------------------------------------------------\n")
+          print("ANOVA - one or more of the assumptions not met, take with a grain of salt.")
+          print(summary(simpsonaov))
+          writeLines("\n--------------------------------------------------------------\n")
+        }
+    } else {
+        print("Stats skipped. To toggle on use \"--stats run\" in vAMPirus launch command")
+    }
+}
+```
+<br>
+<br>
+<br>
+<br>
+
+### PhyloGroup Richness
+
+<br>
+```{r phy_diversity_analysis3a, echo=FALSE}
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+    mind5<-min(meddata5$sum)
+    index <- rarefy(meddata5[,2:cols], sample=mind5)
+    rarerichnessdata5 <- as.data.frame(index)
+    rarerichnessdata5$sample <-meddata5$sample
+    richdata5_2 <- merge(rarerichnessdata5, metadata, by="sample")
+
+    ri <- plot_ly(richdata5_2, x=~treatment, y=~index, color=~treatment, colors=mycol, type="box", boxpoints = "all", pointpos = 0, jitter = 0.5)
+    #ri <- ri %>% layout(title = list(text="ASV Richness",y=.99))
+    ri <- ri %>% layout(legend = list(x=10,y=.5), yaxis=list(title = "Index"), xaxis=list(title = "Treatment"))
+    ri <- ri %>% config(toImageButtonOptions=list(format='svg',filename='SpeciesRich', height= 500, width= 800, scale= 1))
+    ri
+}
+```
+
+```{r med_diversity_analysis3b, echo=FALSE}
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+    if (params$stats == "true" ) {
+        richaov <- aov(index ~ treatment, data= richdata5_2)
+        st <- shapiro.test(resid(richaov))
+        bt <- bartlett.test(index ~ treatment, data= richdata5_2)
+
+        if (st$p.value > .05 && bt$p.value > .05) {
+          print("Shapiro Test of normality - data is normal p-value > 0.05")
+          print(shapiro.test(resid(richaov)))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("Bartlett Test variance homogeneity - variance is homogeneous p-value > 0.05")
+          print(bartlett.test(index ~ treatment, data= richdata5_2))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("ANOVA Results")
+          print(summary(richaov))
+          #Tukey Honest Significant Differences (pairwise comparison) - significant p <.05
+          writeLines("\n--------------------------------------------------------------\n")
+          print("Tukey HSD - Pairwise comparison - significant differences indicated by p-value < 0.05")
+          print(TukeyHSD(richaov))
+          writeLines("\n--------------------------------------------------------------\n")
+        } else {
+          print("Shapiro Test of normality - data is normal if p-value > 0.05")
+          print(shapiro.test(resid(richaov)))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("Bartlett Test variance homogeneity - variance is homogeneous if p-value > 0.05")
+          print(bartlett.test(index ~ treatment, data= richdata5_2))
+          writeLines("\n--------------------------------------------------------------\n")
+          print("Data either not normal or variance not homogenous")
+          print("Kruskal-Wallis Test - test significant if p <.05")
+          #Kruskal-Wallis test - significant p <.05
+          mykt <- kruskal.test(index ~ treatment, data= richdata5_2)
+          print(mykt)
+          writeLines("\n--------------------------------------------------------------\n")
+          if (mykt$p.value < .05) {
+            #Pairwise comparison
+            print("Wilcox.test - pairwise comparison")
+            print(pairwise.wilcox.test(richdata5_2$index, richdata5_2$treatment, p.adjust.method = "BH"))
+          } else {
+            print("Data not significant. Skipping pairwise comparison")
+          }
+          writeLines("\n--------------------------------------------------------------\n")
+          print("ANOVA - one or more of the assumptions not met, take with a grain of salt.")
+          print(summary(richaov))
+          writeLines("\n--------------------------------------------------------------\n")
+        }
+    } else {
+        print("Stats skipped. To toggle on use \"--stats run\" in vAMPirus launch command")
+    }
+}
+```
+<br>
+<br>
+<br>
+<br>
+<br>
+
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;PhyloGroup Distance To Centroid</h2></div>
+<br>
+```{r med_distance2a, echo=FALSE}
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+    ##Distance
+    intermediate <- raredata
+    bray.distance <- vegdist(sqrt(intermediate), method="bray")
+
+    ##Dispersion
+    disper <- betadisper(bray.distance, group = metadata$treatment, type="centroid")
+    df <- data.frame(Distance_to_centroid=disper$distances,Group=disper$group)
+    df$sample <- meddata5$sample
+    df2 <- merge(df, metadata, by="sample")
+
+    cen <- plot_ly(df2, x=~treatment, y=~Distance_to_centroid, color=~treatment, colors=mycol, type="box", boxpoints = "all", pointpos = 0, jitter = 0.5)
+    #cen <- cen %>% layout(title = list(text="Distance to centroid",y=.99))
+    cen <- cen %>% layout(legend = list(x=10,y=.5), yaxis=list(title = "Distance"), xaxis=list(title = "Treatment"))
+    cen <- cen %>% config(toImageButtonOptions=list(format='svg',filename='Dispersion', height= 500, width= 800, scale= 1))
+    cen
+}
+```
+```{r med_distance2b, echo=FALSE}
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+    if (params$stats == "true" ) {
+        adn <- adonis(bray.distance~meddata5$treatment)
+        adn
+    } else {
+        print("Stats skipped. To toggle on use \"--stats run\" in vAMPirus launch command")
+    }
+}
+```
+<br>
+<br>
+<br>
+<br>
+<br>
+
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;NMDS Plots</h2></div>
+
+<br>
+
+### PhyloGroup 2D NMDS
+
+<br>
+```{r med_nmds2d, echo=FALSE}
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+    ##NMDS
+    datax <- decostand(raredata,method="total") #method 'total' normalizes data to sum up to 1 --data5[,2:cols]
+
+    MDS <- metaMDS(sqrt(datax),
+                   distance = "bray",autotransform = FALSE,
+                   k = 2,
+                   maxit = 999,
+                   trymax = params$try,
+                   wascores = TRUE)
+
+    if (MDS$converged == "TRUE") {
+
+    data.scores <- as.data.frame(scores(MDS))
+    data.scores$sample <- meddata5$sample
+    data.scores.2 <- merge(data.scores, metadata, by="sample")
+
+    p <- ggplot(data.scores.2, aes(x=NMDS1, y=NMDS2,color=treatment))+
+      geom_point(size=2)+
+      theme_classic()+
+      theme(legend.title = element_blank())
+
+    #fff <- ggplotly(p)
+    #fff <- fff %>% layout(legend=list(y=.5))
+    #fff
+
+    fff <-plot_ly(data.scores.2, x=~NMDS1, y=~NMDS2, color=~treatment, colors=mycol, text = ~paste("Sample: ", sample))
+    fff <- fff %>% layout(legend=list(y=.5))
+    fff <- fff %>% config(toImageButtonOptions=list(format='svg',filename='2Dnmds', height= 500, width= 800, scale= 1))
+    fff
+
+    } else {
+      print("NMDS did not converge. Printing PCoA")
+      #calculate bray curtis distance
+      bcdist <- vegdist(datax, "bray")
+      res <- pcoa(bcdist)
+      #res$values
+      #biplot(res) #to make the boring 2D PCoA
+      comp <- as.data.frame(res$vectors)
+      comp$sample <- data5$sample
+      comp2 <- merge(comp, metadata, by="sample")
+      fig <- plot_ly(comp2, x = ~Axis.1, y = ~Axis.2, color= ~treatment, text = ~paste("Sample: ", sample,"<br>Treatment: ",treatment))
+      fig <- fig %>% layout(xaxis = list(title = "PCoA 1"),
+                            yaxis = list(title = "PCoA 2"))
+      fig <- fig %>% layout(legend=list(y=.5))
+      fig <- fig %>% config(toImageButtonOptions=list(format='svg',filename='2D_PostMED_PCoA', height= 500, width= 800, scale= 1))
+      fig
+    }
+}
+```
+
+<br>
+<br>
+<br>
+<br>
+
+### PhyloGroup 3D NMDS
+
+<br>
+
+```{r med_nmds3d, echo=FALSE}
+if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+    MDS3 <- metaMDS(sqrt(datax),
+                   distance = "bray",autotransform = FALSE,
+                   k = 3,
+                   maxit = 999,
+                   trymax = params$try,
+                   wascores = TRUE)
+
+    if (MDS3$converged == "TRUE") {
+
+    data.scores3 <- as.data.frame(scores(MDS3))
+    data.scores3$sample <- meddata5$sample
+    data.scores.3 <- merge(data.scores3, metadata, by="sample")
+    p3d <- plot_ly(data.scores.3, x=~NMDS1, y=~NMDS2, z=~NMDS3, text=~paste("Sample: ", sample),
+            color=~treatment, colors=mycol,
+            mode = 'markers', symbol = ~treatment, symbols = c('square','circle'),
+            marker = list(opacity = .8,line=list(color = 'darkblue',width = 1))
+            )
+    p3d <- p3d %>% layout(legend=list(y=.5))
+    p3d <- p3d %>% config(toImageButtonOptions=list(format='svg',filename='3Dnmds', height= 500, width= 800, scale= 1))
+    p3d
+
+    } else {
+      print("NMDS did not converge. Printing PCoA")
+      #calculate bray curtis distance
+      bcdist <- vegdist(datax, "bray")
+      res <- pcoa(bcdist)
+      comp <- as.data.frame(res$vectors)
+      comp$sample <- data5$sample
+      comp2 <- merge(comp, metadata, by="sample")
+      fig <- plot_ly(comp2, x = ~Axis.1, y = ~Axis.2, z = ~Axis.3, color= ~treatment, text = ~paste("Sample: ", sample,"<br>Treatment: ",treatment))
+      fig <- fig %>% layout(xaxis = list(title = "PCoA 1"),
+                            yaxis = list(title = "PCoA 2"),
+                            zaxis = list(title = "PCoA 3"))
+      fig <- fig %>% layout(legend=list(y=.5))
+      fig <- fig %>% config(toImageButtonOptions=list(format='svg',filename='3D_PCoA', height= 500, width= 800, scale= 1))
+      fig
+    }
+}
+```
+
 <br>
 <br>
 

From 5b09710754a4a061c8289e7690a8da3976f40d82 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 09:37:53 -0500
Subject: [PATCH 10/47] fixed values

---
 vAMPirus.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index 536aff3..a448aad 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2369,11 +2369,11 @@ if (params.DataCheck || params.Analyze) {
                           #create headless treeclustering.out
                           tail -n +2 ${params.projtag}_ASV_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                           #summarizing clustering results
-                          awk -F "\t" '{print $2}' | sort | uniq > group.list
+                          awk -F "\t" '{print \$2}' | sort | uniq > group.list
                           g=1
                           echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
                           for x in \$(cat group.list)
-                          do    grep "$x" headless.out > tmp.tsv
+                          do    grep "\$x" headless.out > tmp.tsv
                                 groupID="phyloGroup\${g}"
                                 awk -F "\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
                                 g=\$((\$g+1))
@@ -3042,11 +3042,11 @@ if (params.DataCheck || params.Analyze) {
                               #create headless treeclustering.out
                               tail -n +2 ${params.projtag}_AminoType_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                               #summarizing clustering results
-                              awk -F "\t" '{print $2}' | sort | uniq > group.list
+                              awk -F "\t" '{print \$2}' | sort | uniq > group.list
                               g=1
                               echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
                               for x in \$(cat group.list)
-                              do    grep "$x" headless.out > tmp.tsv
+                              do    grep "\$x" headless.out > tmp.tsv
                                     groupID="phyloGroup\${g}"
                                     awk -F "\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
                                     g=\$((\$g+1))

From 97a161bdd13b9e77fc1024ad251a0bd6e78916e0 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 09:39:42 -0500
Subject: [PATCH 11/47] fixed parameter

---
 vAMPirus.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index a448aad..a8b2b93 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -678,7 +678,7 @@ if (params.DataCheck || params.Analyze) {
                     echo ${sample_id}
 
                     fastp -i ${reads[0]} -I ${reads[1]} -o left-${sample_id}.filter.fq -O right-${sample_id}.filter.fq --detect_adapter_for_pe \
-                    --average_qual ${params.avQ} -n ${params.nM} -c --overrepresentation_analysis --html ${sample_id}.fastp.html --json ${sample_id}.fastp.json --thread ${task.cpus} \
+                    --average_qual ${params.avQ} -n ${params.mN} -c --overrepresentation_analysis --html ${sample_id}.fastp.html --json ${sample_id}.fastp.json --thread ${task.cpus} \
                     --report_title ${sample_id}
 
                     bash get_readstats.sh ${sample_id}.fastp.json

From a46d681c9f4f60d52734d47bc7b6203a0aff7f0a Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 09:52:56 -0500
Subject: [PATCH 12/47] fixed error in pairwisedistance

---
 vAMPirus.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index a8b2b93..a0af158 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -1206,7 +1206,7 @@ if (params.DataCheck || params.Analyze) {
                 tail -\$(( \${yesirr}-1 )) number_per_percentage_protz.csv > number_per_percentage_prot.csv
                 head -1 number_per_percentage_protz.csv >> number_per_percentage_prot.csv
                 rm number_per_percentage_protz.csv
-                clustalo -i aminoacid_pcASV1.0_noTaxonomy.fasta --distmat-out=${params.projtag}_distance.matrix --full --force --threads=${task.cpus}
+                clustalo -i *aminoacid_pcASV1.0_noTaxonomy.fasta --distmat-out=${params.projtag}_distance.matrix --full --force --threads=${task.cpus}
                 cat ${params.projtag}_distance.matrix | tr " " ","  | grep "," >${params.projtag}_AminoType_PairwiseDistance.matrix
                 rm ${params.projtag}_distance.matrix
                 """

From a05432aec490ebd1f9df5e3d811f7faf12b7c263 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 10:01:26 -0500
Subject: [PATCH 13/47] edit

---
 bin/vAMPirus_DC_Report.Rmd | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/bin/vAMPirus_DC_Report.Rmd b/bin/vAMPirus_DC_Report.Rmd
index b4eea2c..dc11d8b 100644
--- a/bin/vAMPirus_DC_Report.Rmd
+++ b/bin/vAMPirus_DC_Report.Rmd
@@ -400,10 +400,10 @@ cols <- dim(simmatrix)[2]
 simmatrix$AA <- rownames(simmatrix)
 rval=nrow(simmatrix)
 simmatrix2 <- simmatrix %>%
-  gather(1:rval, key=sequence, value=similarity)
-x=reorder(simmatrix2$AA,simmatrix2$similarity)
-y=reorder(simmatrix2$sequence,simmatrix2$similarity)
-similaritymatrix <- ggplot(simmatrix2, aes(x=x, y=y,fill=similarity))+
+  gather(1:rval, key=sequence, value=Distance)
+x=reorder(simmatrix2$AA,simmatrix2$Distance)
+y=reorder(simmatrix2$sequence,simmatrix2$Distance)
+similaritymatrix <- ggplot(simmatrix2, aes(x=x, y=y,fill=Distance))+
       geom_raster()+
       scale_fill_distiller(palette="Spectral")+
       theme(axis.text.x = element_text(angle = 90))+
@@ -480,10 +480,10 @@ cols <- dim(simmatrix)[2]
 simmatrix$AA <- rownames(simmatrix)
 rval=nrow(simmatrix)
 simmatrix2 <- simmatrix %>%
-  gather(1:rval, key=sequence, value=similarity)
-x=reorder(simmatrix2$AA,simmatrix2$similarity)
-y=reorder(simmatrix2$sequence,simmatrix2$similarity)
-similaritymatrix <- ggplot(simmatrix2, aes(x=x, y=y,fill=similarity))+
+  gather(1:rval, key=sequence, value=Distance)
+x=reorder(simmatrix2$AA,simmatrix2$Distance)
+y=reorder(simmatrix2$sequence,simmatrix2$Distance)
+similaritymatrix <- ggplot(simmatrix2, aes(x=x, y=y,fill=Distance))+
       geom_raster()+
       scale_fill_distiller(palette="Spectral")+
       theme(axis.text.x = element_text(angle = 90))+

From ee88eedfd53eef3e4e1919f7049ba11f89a21db1 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 10:19:24 -0500
Subject: [PATCH 14/47] fixed phyloclusteing commands

---
 example_data/conf/test.config | 2 ++
 vAMPirus.nf                   | 4 ++++
 vampirus.config               | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/example_data/conf/test.config b/example_data/conf/test.config
index cbc63e1..660912b 100644
--- a/example_data/conf/test.config
+++ b/example_data/conf/test.config
@@ -24,4 +24,6 @@ params {
     stats = true
     aminoMED = true
     asvMED = true
+    asvTClust = true
+    aminoTClust = true
 }
diff --git a/vAMPirus.nf b/vAMPirus.nf
index a0af158..371ec61 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2382,6 +2382,8 @@ if (params.DataCheck || params.Analyze) {
                           """
                 }
 
+            } else {
+                asv_phylogroupcsv = Channel.empty()
             }
 
             if (params.asvMED) {
@@ -3054,6 +3056,8 @@ if (params.DataCheck || params.Analyze) {
                               rm tmp.csv
                              """
                          }
+                } else {
+                    amino_phylogroupcsv = Channel.empty()
                 }
 
 
diff --git a/vampirus.config b/vampirus.config
index cb03ae3..8a0bd04 100644
--- a/vampirus.config
+++ b/vampirus.config
@@ -229,6 +229,8 @@ params {
         filter = false
     // Phylogeny-based ASV clustering
         asvTClust = false
+    // Phylogeny-based AminoType clustering
+        aminoTClust = false
 // Skip options
     // Skip all Read Processing steps
         skipReadProcessing = false

From 1f55b193c59caa44ba11380791627aa5e7fd3b2d Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 10:28:21 -0500
Subject: [PATCH 15/47] fixed treeclustering options

---
 vampirus.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vampirus.config b/vampirus.config
index 8a0bd04..3e854a3 100644
--- a/vampirus.config
+++ b/vampirus.config
@@ -121,10 +121,10 @@ params {
       // NOTE: you can't use "--skipPhylogeny" when doing this form of sequence clustering
 
         // TreeCluster command options for ASV clustering (--asvTClust) -- (Example: "-option1 A -option2 B -option3 C -option4 D") - See TreeCluster paper and github page to determine the best options (a good start is what is below)
-            asvTCopp="-t 0.045 -max_clade"
+            asvTCopp="-t 0.045 -m max_clade"
 
         // TreeCluster command options for AminoType clustering (--aminoTClust) -- (Example: "-option1 A -option2 B -option3 C -option4 D") - See TreeCluster paper and github page to determine the best options
-            aminoTCopp="-t 0.045 -max_clade"
+            aminoTCopp="-t 0.045 -m max_clade"
 
     // Counts table generation parameters
 

From 33fe9c21fb97d1d88b1c6955ccabccd6b3189818 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 10:31:58 -0500
Subject: [PATCH 16/47] fixe treecluster

---
 vAMPirus.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index 371ec61..1540574 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2369,13 +2369,13 @@ if (params.DataCheck || params.Analyze) {
                           #create headless treeclustering.out
                           tail -n +2 ${params.projtag}_ASV_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                           #summarizing clustering results
-                          awk -F "\t" '{print \$2}' | sort | uniq > group.list
+                          awk -F "\\t" '{print \$2}' | sort | uniq > group.list
                           g=1
                           echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
                           for x in \$(cat group.list)
                           do    grep "\$x" headless.out > tmp.tsv
                                 groupID="phyloGroup\${g}"
-                                awk -F "\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
+                                awk -F "\\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
                                 g=\$((\$g+1))
                           done
                           rm tmp.csv
@@ -3044,13 +3044,13 @@ if (params.DataCheck || params.Analyze) {
                               #create headless treeclustering.out
                               tail -n +2 ${params.projtag}_AminoType_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                               #summarizing clustering results
-                              awk -F "\t" '{print \$2}' | sort | uniq > group.list
+                              awk -F "\\t" '{print \$2}' | sort | uniq > group.list
                               g=1
                               echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
                               for x in \$(cat group.list)
                               do    grep "\$x" headless.out > tmp.tsv
                                     groupID="phyloGroup\${g}"
-                                    awk -F "\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
+                                    awk -F "\\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
                                     g=\$((\$g+1))
                               done
                               rm tmp.csv

From ad309d42d25bf8636615ab2d3ce453324f52e0f2 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 10:40:33 -0500
Subject: [PATCH 17/47] fixed treeclusteiing process

---
 vAMPirus.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index 1540574..42f0258 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2369,7 +2369,7 @@ if (params.DataCheck || params.Analyze) {
                           #create headless treeclustering.out
                           tail -n +2 ${params.projtag}_ASV_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                           #summarizing clustering results
-                          awk -F "\\t" '{print \$2}' | sort | uniq > group.list
+                          awk -F "\\t" '{print \$2}' headless.treeout | sort | uniq > group.list
                           g=1
                           echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
                           for x in \$(cat group.list)
@@ -3044,7 +3044,7 @@ if (params.DataCheck || params.Analyze) {
                               #create headless treeclustering.out
                               tail -n +2 ${params.projtag}_AminoType_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                               #summarizing clustering results
-                              awk -F "\\t" '{print \$2}' | sort | uniq > group.list
+                              awk -F "\\t" '{print \$2}' headless.treeout | sort | uniq > group.list
                               g=1
                               echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
                               for x in \$(cat group.list)

From 6e9cb6269a67c5f35a97a5e17c3e2a5eda1f7f8f Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 10:42:05 -0500
Subject: [PATCH 18/47] fix

---
 vAMPirus.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index 42f0258..7818d42 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2373,7 +2373,7 @@ if (params.DataCheck || params.Analyze) {
                           g=1
                           echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
                           for x in \$(cat group.list)
-                          do    grep "\$x" headless.out > tmp.tsv
+                          do    grep "\$x" headless.treeout > tmp.tsv
                                 groupID="phyloGroup\${g}"
                                 awk -F "\\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
                                 g=\$((\$g+1))
@@ -3048,7 +3048,7 @@ if (params.DataCheck || params.Analyze) {
                               g=1
                               echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
                               for x in \$(cat group.list)
-                              do    grep "\$x" headless.out > tmp.tsv
+                              do    grep "\$x" headless.treeout > tmp.tsv
                                     groupID="phyloGroup\${g}"
                                     awk -F "\\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
                                     g=\$((\$g+1))

From f1f1d3273efa05bcd9bcede0159f810ae8d0c8b1 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 10:43:18 -0500
Subject: [PATCH 19/47] fixed remove command

---
 vAMPirus.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index 7818d42..3e74a6b 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2378,7 +2378,7 @@ if (params.DataCheck || params.Analyze) {
                                 awk -F "\\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
                                 g=\$((\$g+1))
                           done
-                          rm tmp.csv
+                          rm tmp.tsv
                           """
                 }
 
@@ -3053,7 +3053,7 @@ if (params.DataCheck || params.Analyze) {
                                     awk -F "\\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
                                     g=\$((\$g+1))
                               done
-                              rm tmp.csv
+                              rm tmp.tsv
                              """
                          }
                 } else {

From 908a53487de3aa1dfd592d689719fa59a3e4e802 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 11:41:59 -0500
Subject: [PATCH 20/47] updated ymlfile

---
 vampirus_env.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vampirus_env.yml b/vampirus_env.yml
index 868a0ef..ccdcbf3 100644
--- a/vampirus_env.yml
+++ b/vampirus_env.yml
@@ -32,10 +32,10 @@ dependencies:
   - r-tidyverse=1.3.0
   - r-cowplot=1.0.0
   - r-scales=1.1.1
-  - bioconductor-ggtree=2.2.1
-  - bioconductor-biocparallel=1.24.0
+  - bioconductor-ggtree
+  - r-devtools
   - pigz=2.4
-  - r-biocmanager=1.30.10
+  - r-biocmanager
   - pip:
     - oligotyping
     - treecluster

From b26f3cff8d1c7765c1ece13cd8d290b26bb6f025 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 15:36:07 -0500
Subject: [PATCH 21/47] revised treeclustering processes

---
 bin/vAMPirus_Report.Rmd |  2 ++
 vAMPirus.nf             | 62 +++++++++++++++++++++++++----------------
 2 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index e747313..49bbfe2 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -18,6 +18,8 @@ params:
     aminoMED: !r commandArgs(trailingOnly=T)[12]
     type: !r commandArgs(trailingOnly=T)[13]
     nodeCol: !r commandArgs(trailingOnly=T)[14]
+    asvTClust: !r commandArgs(trailingOnly=T)[15]
+    aminoTClust: !r commandArgs(trailingOnly=T)[16]
 ---
 <style>
 .rectangle {
diff --git a/vAMPirus.nf b/vAMPirus.nf
index 3e74a6b..56f4aae 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2240,7 +2240,7 @@ if (params.DataCheck || params.Analyze) {
 
             output:
                 tuple file("*_counts.csv"), file("*_counts.biome") into counts_vsearch_asv
-                file("*_ASV*counts.csv") into (asv_counts_plots, asvcount_med)
+                file("*_ASV*counts.csv") into (asv_counts_plots, asvcount_med, asvcount_phylogr)
 
             script:
                 """
@@ -2357,10 +2357,11 @@ if (params.DataCheck || params.Analyze) {
 
                       input:
                          file(tree) from asv_treeclust
-
+                         file(counts) from asvcount_phylogr
                       output:
                           file("*treeclustering*.out") into asvtreeclustering_res
-                          file("*phylogroup.csv") into asv_phylogroupcsv
+                          file("*ASV_phylogroup.csv") into asv_phylogroupcsv
+                          file("*ASV_phylogroupingcounts.csv") into asv_phylogroupingcsv
 
                       script:
                           """
@@ -2370,20 +2371,26 @@ if (params.DataCheck || params.Analyze) {
                           tail -n +2 ${params.projtag}_ASV_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                           #summarizing clustering results
                           awk -F "\\t" '{print \$2}' headless.treeout | sort | uniq > group.list
-                          g=1
-                          echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
-                          for x in \$(cat group.list)
-                          do    grep "\$x" headless.treeout > tmp.tsv
-                                groupID="phyloGroup\${g}"
-                                awk -F "\\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
-                                g=\$((\$g+1))
+                          echo "Sequence_ID,phyloGroup_ID" > ${params.projtag}_ASV_phylogroup.csv
+                          for x in \$(seq "\$(wc -l group.list | awk '{print \$1}')");
+                          do      echo "phyloGroup"\$x"" >> grup.list
+                          done
+                          paste -d "," grup.list group.list > groups.csv
+                          rm group.list group.list
+                          awk -F "," '{print \$1}' ${counts} | sed '1d' > asv.list
+                          for z in \$(cat asv.list);
+                          do      grp=$(grep -w "\$z" headless.treeout | awk -F "\\t" '{print \$2}')
+                                  group=$(grep -w "\$grp" groups.csv | awk -F "," '{print \$1}')
+                                  echo ""\$z","\$group"" >> ${params.projtag}_ASV_phylogroup.csv
                           done
-                          rm tmp.tsv
+                          awk -F "," '{print \$2}' ${params.projtag}_ASV_phylogroup.csv > groups.list
+                          paste -d"," groups.list ${counts} > ${params.projtag}_ASV_phyloGroupingcounts.csv
                           """
                 }
 
             } else {
                 asv_phylogroupcsv = Channel.empty()
+                asv_phylogroupingcsv = Channel.empty()
             }
 
             if (params.asvMED) {
@@ -3032,10 +3039,11 @@ if (params.DataCheck || params.Analyze) {
 
                           input:
                              file(tree) from amino_treeclust
-
+                             file(counts) from amino_countphylo
                           output:
                              file("*treeclustering*.out") into aminotreeclustering_res
-                             file("*phylogroup.csv") into amino_phylogroupcsv
+                             file("${params.projtag}_amino_phylogroup.csv") into amino_phylogroupcsv
+                             file("${params.projtag}_amino_phyloGroupingcounts.csv") into amino_phylogroupingcsv
 
                           script:
                               """
@@ -3045,19 +3053,25 @@ if (params.DataCheck || params.Analyze) {
                               tail -n +2 ${params.projtag}_AminoType_treeclustering.out | sed 's/-1/0/g' > headless.treeout
                               #summarizing clustering results
                               awk -F "\\t" '{print \$2}' headless.treeout | sort | uniq > group.list
-                              g=1
-                              echo "SequenceID,PhyloGroup" > ${params.projtag}_phylogroup.csv
-                              for x in \$(cat group.list)
-                              do    grep "\$x" headless.treeout > tmp.tsv
-                                    groupID="phyloGroup\${g}"
-                                    awk -F "\\t" '{print \$1",'\${groupID}'"}' tmp.tsv >> ${params.projtag}_phylogroup.csv
-                                    g=\$((\$g+1))
+                              echo "Sequence_ID,phyloGroup_ID" > ${params.projtag}_amino_phylogroup.csv
+                              for x in \$(seq "\$(wc -l group.list | awk '{print \$1}')");
+                              do      echo "phyloGroup"\$x"" >> grup.list
+                              done
+                              paste -d "," grup.list group.list > groups.csv
+                              rm group.list group.list
+                              awk -F "," '{print \$1}' ${counts} | sed '1d' > amino.list
+                              for z in \$(cat amino.list);
+                              do      grp=$(grep -w "\$z" headless.treeout | awk -F "\\t" '{print \$2}')
+                                      group=$(grep -w "\$grp" groups.csv | awk -F "," '{print \$1}')
+                                      echo ""\$z","\$group"" >> ${params.projtag}_amino_phylogroup.csv
                               done
-                              rm tmp.tsv
+                              awk -F "," '{print \$2}' ${params.projtag}_amino_phylogroup.csv > groups.list
+                              paste -d"," groups.list ${counts} > ${params.projtag}_amino_phyloGroupingcounts.csv
                              """
                          }
                 } else {
                     amino_phylogroupcsv = Channel.empty()
+                    amino_phylogroupingcsv = Channel.empty()
                 }
 
 
@@ -3074,7 +3088,7 @@ if (params.DataCheck || params.Analyze) {
 
                     output:
                         tuple file("*_AminoType_counts.csv"), file("*dmd.out") into counts_summary
-                        file("*_AminoType_counts.csv") into (aminocounts_plot, aminocountmed)
+                        file("*_AminoType_counts.csv") into (aminocounts_plot, aminocountmed, amino_countphylo)
 
                     script:
                         """
@@ -4312,7 +4326,7 @@ if (params.DataCheck || params.Analyze) {
                 */
 
                 report_asv = Channel.create()
-                asv_counts_plots.mix(taxplot_asv, asv_heatmap, nucl_phyl_plot_asv, asvgroupscsv, asvgroupcounts, asv_group_rep_tree, tax_table_asv, tax_nodCol_asv, asv_phylogroupcsv).flatten().buffer(size:10).dump(tag:'asv').into(report_asv)
+                asv_counts_plots.mix(taxplot_asv, asv_heatmap, nucl_phyl_plot_asv, asvgroupscsv, asvgroupcounts, asv_group_rep_tree, tax_table_asv, tax_nodCol_asv, asv_phylogroupcsv, asv_phylogroupingcsv).flatten().buffer(size:11).dump(tag:'asv').into(report_asv)
 
                 if (params.ncASV) {
                     report_ncasv = Channel.create()
@@ -4357,7 +4371,7 @@ if (params.DataCheck || params.Analyze) {
 
                 if (!params.skipAminoTyping) {
                     report_aminotypes = Channel.create()
-                    aminocounts_plot.mix(taxplot2, aminotype_heatmap, amino_rax_plot, atygroupscsv, amino_group_rep_tree, amino_groupcounts, tax_table_amino, tax_nodCol_amino, amino_phylogroupcsv).flatten().buffer(size:10).dump(tag:'amino').into(report_aminotypes)
+                    aminocounts_plot.mix(taxplot2, aminotype_heatmap, amino_rax_plot, atygroupscsv, amino_group_rep_tree, amino_groupcounts, tax_table_amino, tax_nodCol_amino, amino_phylogroupcsv, amino_phylogroupingcsv).flatten().buffer(size:11).dump(tag:'amino').into(report_aminotypes)
                     /*
                     Report_AminoTypes
                     aminocounts_plot -> ${params.projtag}_AminoType_counts.csv

From b721b50aaea080288bf44e52c471d30d57783244 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 15:40:37 -0500
Subject: [PATCH 22/47] fixed $

---
 vAMPirus.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index 56f4aae..7424590 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2379,8 +2379,8 @@ if (params.DataCheck || params.Analyze) {
                           rm group.list group.list
                           awk -F "," '{print \$1}' ${counts} | sed '1d' > asv.list
                           for z in \$(cat asv.list);
-                          do      grp=$(grep -w "\$z" headless.treeout | awk -F "\\t" '{print \$2}')
-                                  group=$(grep -w "\$grp" groups.csv | awk -F "," '{print \$1}')
+                          do      grp=\$(grep -w "\$z" headless.treeout | awk -F "\\t" '{print \$2}')
+                                  group=\$(grep -w "\$grp" groups.csv | awk -F "," '{print \$1}')
                                   echo ""\$z","\$group"" >> ${params.projtag}_ASV_phylogroup.csv
                           done
                           awk -F "," '{print \$2}' ${params.projtag}_ASV_phylogroup.csv > groups.list
@@ -3061,8 +3061,8 @@ if (params.DataCheck || params.Analyze) {
                               rm group.list group.list
                               awk -F "," '{print \$1}' ${counts} | sed '1d' > amino.list
                               for z in \$(cat amino.list);
-                              do      grp=$(grep -w "\$z" headless.treeout | awk -F "\\t" '{print \$2}')
-                                      group=$(grep -w "\$grp" groups.csv | awk -F "," '{print \$1}')
+                              do      grp=\$(grep -w "\$z" headless.treeout | awk -F "\\t" '{print \$2}')
+                                      group=\$(grep -w "\$grp" groups.csv | awk -F "," '{print \$1}')
                                       echo ""\$z","\$group"" >> ${params.projtag}_amino_phylogroup.csv
                               done
                               awk -F "," '{print \$2}' ${params.projtag}_amino_phylogroup.csv > groups.list

From 2faa1511f073a4b0e19ab939d09b4cec9044c066 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 15:42:26 -0500
Subject: [PATCH 23/47] rearranged processes

---
 vAMPirus.nf | 91 ++++++++++++++++++++++++++---------------------------
 1 file changed, 45 insertions(+), 46 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index 7424590..e25dbb5 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -3029,52 +3029,6 @@ if (params.DataCheck || params.Analyze) {
                     }
                 }
 
-                if (params.aminoTClust) {
-
-                    process AminoType_PhyloClustering {
-
-                          label 'norm_cpus'
-
-                          publishDir "${params.workingdir}/${params.outdir}/Analyze/Analyses/AminoTypes/TreeCluster", mode: "copy", overwrite: true
-
-                          input:
-                             file(tree) from amino_treeclust
-                             file(counts) from amino_countphylo
-                          output:
-                             file("*treeclustering*.out") into aminotreeclustering_res
-                             file("${params.projtag}_amino_phylogroup.csv") into amino_phylogroupcsv
-                             file("${params.projtag}_amino_phyloGroupingcounts.csv") into amino_phylogroupingcsv
-
-                          script:
-                              """
-                              TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_AminoType_treeclustering.out
-                              TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_ASV_treeclustering_verbose.out
-                              #create headless treeclustering.out
-                              tail -n +2 ${params.projtag}_AminoType_treeclustering.out | sed 's/-1/0/g' > headless.treeout
-                              #summarizing clustering results
-                              awk -F "\\t" '{print \$2}' headless.treeout | sort | uniq > group.list
-                              echo "Sequence_ID,phyloGroup_ID" > ${params.projtag}_amino_phylogroup.csv
-                              for x in \$(seq "\$(wc -l group.list | awk '{print \$1}')");
-                              do      echo "phyloGroup"\$x"" >> grup.list
-                              done
-                              paste -d "," grup.list group.list > groups.csv
-                              rm group.list group.list
-                              awk -F "," '{print \$1}' ${counts} | sed '1d' > amino.list
-                              for z in \$(cat amino.list);
-                              do      grp=\$(grep -w "\$z" headless.treeout | awk -F "\\t" '{print \$2}')
-                                      group=\$(grep -w "\$grp" groups.csv | awk -F "," '{print \$1}')
-                                      echo ""\$z","\$group"" >> ${params.projtag}_amino_phylogroup.csv
-                              done
-                              awk -F "," '{print \$2}' ${params.projtag}_amino_phylogroup.csv > groups.list
-                              paste -d"," groups.list ${counts} > ${params.projtag}_amino_phyloGroupingcounts.csv
-                             """
-                         }
-                } else {
-                    amino_phylogroupcsv = Channel.empty()
-                    amino_phylogroupingcsv = Channel.empty()
-                }
-
-
                 process Generate_AminoTypes_Counts_Table {
 
                     label 'high_cpus'
@@ -3121,6 +3075,51 @@ if (params.DataCheck || params.Analyze) {
                 }
             }
 
+            if (params.aminoTClust) {
+
+                process AminoType_PhyloClustering {
+
+                      label 'norm_cpus'
+
+                      publishDir "${params.workingdir}/${params.outdir}/Analyze/Analyses/AminoTypes/TreeCluster", mode: "copy", overwrite: true
+
+                      input:
+                         file(tree) from amino_treeclust
+                         file(counts) from amino_countphylo
+                      output:
+                         file("*treeclustering*.out") into aminotreeclustering_res
+                         file("${params.projtag}_amino_phylogroup.csv") into amino_phylogroupcsv
+                         file("${params.projtag}_amino_phyloGroupingcounts.csv") into amino_phylogroupingcsv
+
+                      script:
+                          """
+                          TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_AminoType_treeclustering.out
+                          TreeCluster.py -i ${tree} ${params.asvTCopp} > ${params.projtag}_ASV_treeclustering_verbose.out
+                          #create headless treeclustering.out
+                          tail -n +2 ${params.projtag}_AminoType_treeclustering.out | sed 's/-1/0/g' > headless.treeout
+                          #summarizing clustering results
+                          awk -F "\\t" '{print \$2}' headless.treeout | sort | uniq > group.list
+                          echo "Sequence_ID,phyloGroup_ID" > ${params.projtag}_amino_phylogroup.csv
+                          for x in \$(seq "\$(wc -l group.list | awk '{print \$1}')");
+                          do      echo "phyloGroup"\$x"" >> grup.list
+                          done
+                          paste -d "," grup.list group.list > groups.csv
+                          rm group.list group.list
+                          awk -F "," '{print \$1}' ${counts} | sed '1d' > amino.list
+                          for z in \$(cat amino.list);
+                          do      grp=\$(grep -w "\$z" headless.treeout | awk -F "\\t" '{print \$2}')
+                                  group=\$(grep -w "\$grp" groups.csv | awk -F "," '{print \$1}')
+                                  echo ""\$z","\$group"" >> ${params.projtag}_amino_phylogroup.csv
+                          done
+                          awk -F "," '{print \$2}' ${params.projtag}_amino_phylogroup.csv > groups.list
+                          paste -d"," groups.list ${counts} > ${params.projtag}_amino_phyloGroupingcounts.csv
+                         """
+                     }
+            } else {
+                amino_phylogroupcsv = Channel.empty()
+                amino_phylogroupingcsv = Channel.empty()
+            }
+            
               if (params.aminoMED) {
 
                     process AminoType_Minimum_Entropy_Decomposition {

From 10701341e2a1025f33f07b4f84f5c0382c43ca72 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 15:48:39 -0500
Subject: [PATCH 24/47] fixed error

---
 vAMPirus.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index e25dbb5..ab98c79 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2376,7 +2376,7 @@ if (params.DataCheck || params.Analyze) {
                           do      echo "phyloGroup"\$x"" >> grup.list
                           done
                           paste -d "," grup.list group.list > groups.csv
-                          rm group.list group.list
+                          rm grup.list group.list
                           awk -F "," '{print \$1}' ${counts} | sed '1d' > asv.list
                           for z in \$(cat asv.list);
                           do      grp=\$(grep -w "\$z" headless.treeout | awk -F "\\t" '{print \$2}')
@@ -3104,7 +3104,7 @@ if (params.DataCheck || params.Analyze) {
                           do      echo "phyloGroup"\$x"" >> grup.list
                           done
                           paste -d "," grup.list group.list > groups.csv
-                          rm group.list group.list
+                          rm grup.list group.list
                           awk -F "," '{print \$1}' ${counts} | sed '1d' > amino.list
                           for z in \$(cat amino.list);
                           do      grp=\$(grep -w "\$z" headless.treeout | awk -F "\\t" '{print \$2}')
@@ -3119,7 +3119,7 @@ if (params.DataCheck || params.Analyze) {
                 amino_phylogroupcsv = Channel.empty()
                 amino_phylogroupingcsv = Channel.empty()
             }
-            
+
               if (params.aminoMED) {
 
                     process AminoType_Minimum_Entropy_Decomposition {

From 40bbf639e92cd61c7a3982a4422a4a0b28409c72 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 15:50:49 -0500
Subject: [PATCH 25/47] fixed output of asv treeclustering

---
 vAMPirus.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index ab98c79..90cb72c 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -2360,8 +2360,8 @@ if (params.DataCheck || params.Analyze) {
                          file(counts) from asvcount_phylogr
                       output:
                           file("*treeclustering*.out") into asvtreeclustering_res
-                          file("*ASV_phylogroup.csv") into asv_phylogroupcsv
-                          file("*ASV_phylogroupingcounts.csv") into asv_phylogroupingcsv
+                          file("${params.projtag}_ASV_phylogroup.csv") into asv_phylogroupcsv
+                          file("${params.projtag}_ASV_phyloGroupingcounts.csv") into asv_phylogroupingcsv
 
                       script:
                           """

From 0f07bd217e0871cfb33618e3e13c60e35743e718 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ram=C3=B3n=20Rivera-Vic=C3=A9ns?= <riveravicens@gmail.com>
Date: Wed, 20 Oct 2021 17:30:35 -0400
Subject: [PATCH 26/47] WIP. Add phylogroup plot

---
 bin/vAMPirus_Report.Rmd | 165 ++++++++++------------------------------
 1 file changed, 42 insertions(+), 123 deletions(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index 49bbfe2..b6e6870 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -18,8 +18,6 @@ params:
     aminoMED: !r commandArgs(trailingOnly=T)[12]
     type: !r commandArgs(trailingOnly=T)[13]
     nodeCol: !r commandArgs(trailingOnly=T)[14]
-    asvTClust: !r commandArgs(trailingOnly=T)[15]
-    aminoTClust: !r commandArgs(trailingOnly=T)[16]
 ---
 <style>
 .rectangle {
@@ -774,53 +772,27 @@ This tree is a maximum likelihood tree made with IQTREE2 and the parameters you
 <br>
 # Post Phylogeny-based Clustering Analyses
 ```{bash bash_phy_table, include=FALSE}
-if [[ $(ls | grep -wc "asv_medfile.csv") -eq 1 ]];then
-  awk -F "," '{print $2}' asv_medfile.csv | sort | uniq | sort -g >group.list
+if [[ $(ls | grep -c "_phylogroup.csv") -eq 1 ]];then
+  awk -F "," '{print $2}' *_phylogroup.csv | sort | uniq | sort -g >group.list
   for x in $(cat group.list);do
     echo "${x}" >> ${x}.col
-    grep -w "$x" asv_medfile.csv | awk -F "," '{print $1}' | sort >> ${x}.col
+    grep -w "$x" *_phylogroup.csv | awk -F "," '{print $1}' | sort >> ${x}.col
   done
-  paste -d "," *.col > asv_medtable.csv
+  paste -d "," *.col > phylogroup.csv
   rm *.col
+  mv *_phyloGroupingcounts.csv phylogroupcounts.csv
 else
-    echo "Not MED"
-fi
-
-if [[ $(ls | grep -wc "amino_medfile.csv") -eq 1 ]];then
-  awk -F "," '{print $2}' amino_medfile.csv | sort | awk -F "Group" '{print $2}' | sort -n | uniq > group.list
-  for x in $(cat group.list);do
-    gr="Group"${x}""
-    echo "${gr}" > ${x}.col
-    grep -w "$gr" amino_medfile.csv | awk -F "," '{print $1}' | sort >> ${x}.col
-  done
-  paste -d "," *.col > amino_medtable.csv
-  rm *.col
-else
-      echo "Not MED"
+    echo "Not phylogeny based clustering"
 fi
 ```
 
 ```{r PhyloGroup_table, echo=FALSE}
-if (params$asvMED == "true" && params$type == "ASV"){
-    #med_group=read.csv("asv_medfile.csv", header = F)
-    #colnames(med_group) <- c("SequenceID", "Group", "Sequence")
-    # change name of sequence to med peak or something
-    #paged_table(med_group,options = list(rows.print = 20))
-    med_group=read.csv("asv_medtable.csv", header = T)
-    knitr::kable(med_group, digits = 2, align = 'c', booktabs = TRUE, caption = "Table: ASV MED Table") %>%
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
+    phylogroup=read.csv("phylogroup.csv", header = T)
+    knitr::kable(phylogroup, digits = 2, align = 'c', booktabs = TRUE, caption = "Table: PhyloGroup Table") %>%
     kable_styling(font_size = 12, full_width = F)%>%
     scroll_box(width = "100%", height = "100%")
 }
-
-if (params$aminoMED == "true" && params$type == "AminoType"){
-  #med_group=read.csv("amino_medfile.csv", header = F)
-  #colnames(med_group) <- c("SequenceID", "Group", "Sequence")
-  #paged_table(med_group,options = list(rows.print = 20))
-  med_group=read.csv("amino_medtable.csv", header = T)
-  knitr::kable(med_group, digits = 2, align = 'c', booktabs = TRUE, caption = "Table: Aminotype MED Table") %>%
-  kable_styling(font_size = 12, full_width = F) %>%
-  scroll_box(width = "100%", height = "100%")
-}
 ```
 
 <br>
@@ -834,105 +806,52 @@ if (params$aminoMED == "true" && params$type == "AminoType"){
 <br>
 <br>
 
-```{r med_plot, echo=FALSE}
-if (params$asvMED == "true" && params$type == "ASV"){
+```{r phylo_plot, echo=FALSE}
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
   #medgroup=read.csv("uVP_AminoType_Groupingcounts.csv", check.names=FALSE)
-  medgroup=read.csv("asv_groupcounts.csv", check.names=FALSE)
-  meddata <- medgroup %>% select(-2)
-  meddata2 <- aggregate(.~GroupID, meddata, sum)
-  meddata3 <-as.data.frame(t(meddata2))
-  meddata3$sample <- row.names(meddata3)
-  colnames(meddata3)<- as.matrix(meddata3[1,])
-  as.data.frame(meddata3)
-  meddata3 <- meddata3[-1,]
-  meddata3 <- meddata3 %>%
-    rename(sample=GroupID)
-  meddata3dim <- dim(meddata3)
+  phylogroup=read.csv("phylogroupcounts.csv", header = T, check.names=FALSE)
+  phydata <- phylogroup %>% select(-2)
+  phydata2 <- aggregate(.~phyloGroup_ID, phydata, sum)
+  phydata3 <-as.data.frame(t(phydata2))
+  phydata3$sample <- row.names(phydata3)
+  colnames(phydata3)<- as.matrix(phydata3[1,])
+  as.data.frame(phydata3)
+  phydata3 <- phydata3[-1,]
+  phydata3 <- phydata3 %>%
+    rename(sample=phyloGroup_ID)
+  phydata3dim <- dim(phydata3)
   ##Loading metadata
   samples <- read.csv(sample_metadata, header = TRUE)
   #samples <- read.csv("rna_virus_meta.csv", header = TRUE)
   ##Combining data and metadata
-  meddata4 <- merge(meddata3, samples, by="sample")
-  #if "meddata4" removed it does not work
-  #meddata4
-  dim_meddata4 <- dim(meddata4)
+  phydata4 <- merge(phydata3, samples, by="sample")
+  #if "phydata4" removed it does not work
+  #phydata4
+  dim_phydata4 <- dim(phydata4)
   dim_samples <- dim(samples)
-  cols <- dim_meddata4[2]-dim_samples[2]+1
-  first <-colnames(meddata4)[2]
-  last <- colnames(meddata4)[cols]
-  meddata4[,2:cols] <- lapply(meddata4[,2:cols], as.character)
-  meddata4[,2:cols] <- lapply(meddata4[,2:cols], as.numeric)
+  cols <- dim_phydata4[2]-dim_samples[2]+1
+  first <-colnames(phydata4)[2]
+  last <- colnames(phydata4)[cols]
+  phydata4[,2:cols] <- lapply(phydata4[,2:cols], as.character)
+  phydata4[,2:cols] <- lapply(phydata4[,2:cols], as.numeric)
   #Calculate total reads per sample
-  meddata5 <- meddata4%>%
+  phydata5 <- phydata4%>%
     mutate(sum=select(.,2:cols)%>%
              apply(1, sum, na.rm=TRUE))
-  #meddata5
+  #phydata5
   ##Filter samples with low reads
   nfil=params$minimumCounts
   #nfil=1000
-  meddata5 <- meddata5 %>%
+  phydata5 <- phydata5 %>%
     filter(sum>nfil)
      #can cause errors
-  meddata5dim <-dim(meddata5)
-  minreads<-min(meddata5$sum)
-
-  dataz <- decostand(meddata5[,2:cols],method="total") #method 'total' normalizes data to sum up to 1
-  dataz$sample <- meddata5$sample
-  metadata <- meddata5[,(cols+1):meddata5dim[2]]
-  metadata$sample <- meddata5$sample
-  datay <- merge(dataz, metadata, by="sample")
-  datalong <- datay %>%
-    tidyr::gather(first:last, key=hit, value=reads)
-  ddd <- plot_ly(datalong, x=~sample, y=~reads, color=~hit, colors=mycol, type='bar')
-  ddd <- ddd %>% layout(barmode = 'stack')
-  ddd <- ddd %>% layout(legend = list(x=10,y=.5), xaxis=list(title = "Sample"), yaxis=list(title = "Relative abundance"))
-  ddd <- ddd %>% config(toImageButtonOptions=list(format='svg',filename='Relative_abundance', height= 500, width= 800, scale= 1))
-  ddd
-}
-if (params$aminoMED == "true" && params$type == "AminoType"){
-  #medgroup=read.csv("uVP_AminoType_Groupingcounts.csv")
-  medgroup=read.csv("amino_groupcounts.csv",  check.names=FALSE)
-  meddata <- medgroup %>% select(-2)
-  meddata2 <- aggregate(.~GroupID, meddata, sum)
-  meddata3 <-as.data.frame(t(meddata2))
-  meddata3$sample <- row.names(meddata3)
-  colnames(meddata3)<- as.matrix(meddata3[1,])
-  as.data.frame(meddata3)
-  meddata3 <- meddata3[-1,]
-  meddata3 <- meddata3 %>%
-    rename(sample=GroupID)
-  meddata3dim <- dim(meddata3)
-  ##Loading metadata
-  samples <- read.csv(sample_metadata, header = TRUE)
-  ##Combining data and metadata
-  meddata4 <- merge(meddata3, samples, by="sample")
-  #if "meddata4" removed it does not work
-  #meddata4
-  dim_meddata4 <- dim(meddata4)
-  dim_samples <- dim(samples)
-  cols <- dim_meddata4[2]-dim_samples[2]+1
-  first <-colnames(meddata4)[2]
-  last <- colnames(meddata4)[cols]
-  meddata4[,2:cols] <- lapply(meddata4[,2:cols], as.character)
-  meddata4[,2:cols] <- lapply(meddata4[,2:cols], as.numeric)
-  #Calculate total reads per sample
-  meddata5 <- meddata4%>%
-    mutate(sum=select(.,2:cols)%>%
-             apply(1, sum, na.rm=TRUE))
-  #meddata5
-  ##Filter samples with low reads
-  nfil=params$minimumCounts
-  #nfil=1000
-  meddata5 <- meddata5 %>%
-    filter(sum>nfil)
-     #can cause errors
-  meddata5dim <-dim(meddata5)
-  minreads<-min(meddata5$sum)
+  phydata5dim <-dim(phydata5)
+  minreads<-min(phydata5$sum)
 
-  dataz <- decostand(meddata5[,2:cols],method="total") #method 'total' normalizes data to sum up to 1
-  dataz$sample <- meddata5$sample
-  metadata <- meddata5[,(cols+1):meddata5dim[2]]
-  metadata$sample <- meddata5$sample
+  dataz <- decostand(phydata5[,2:cols],method="total") #method 'total' normalizes data to sum up to 1
+  dataz$sample <- phydata5$sample
+  metadata <- phydata5[,(cols+1):phydata5dim[2]]
+  metadata$sample <- phydata5$sample
   datay <- merge(dataz, metadata, by="sample")
   datalong <- datay %>%
     tidyr::gather(first:last, key=hit, value=reads)
@@ -958,10 +877,10 @@ if (params$aminoMED == "true" && params$type == "AminoType"){
 ```{r phy_rarefaction, echo=FALSE, cache=FALSE}
 if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
 ##Rarefaction curves
-rarefaction <- rarecurve(meddata5[,2:cols])
+rarefaction <- rarecurve(phydata5[,2:cols])
 
 ##rarefied dataset
-raredata <- as.data.frame(rrarefy(meddata5[,2:cols], sample=minreads))
+raredata <- as.data.frame(rrarefy(phydata5[,2:cols], sample=minreads))
 }
 ```
 

From b61788cadeb78cc8cddec52371890facaa8351c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ram=C3=B3n=20Rivera-Vic=C3=A9ns?= <riveravicens@gmail.com>
Date: Wed, 20 Oct 2021 17:39:37 -0400
Subject: [PATCH 27/47] WIP. Add diversity plots

---
 bin/vAMPirus_Report.Rmd | 55 +++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index b6e6870..f717013 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -786,7 +786,7 @@ else
 fi
 ```
 
-```{r PhyloGroup_table, echo=FALSE}
+```{r phyloGroup_table, echo=FALSE}
 if (params$asvTClust == "true" || params$aminoTClust == "true"){
     phylogroup=read.csv("phylogroup.csv", header = T)
     knitr::kable(phylogroup, digits = 2, align = 'c', booktabs = TRUE, caption = "Table: PhyloGroup Table") %>%
@@ -875,13 +875,14 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 
 ```{r phy_rarefaction, echo=FALSE, cache=FALSE}
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
 ##Rarefaction curves
 rarefaction <- rarecurve(phydata5[,2:cols])
 
 ##rarefied dataset
 raredata <- as.data.frame(rrarefy(phydata5[,2:cols], sample=minreads))
 }
+}
 ```
 
 <br>
@@ -900,12 +901,12 @@ raredata <- as.data.frame(rrarefy(phydata5[,2:cols], sample=minreads))
 
 <br>
 ```{r phy_diversity_analysis1a, echo=FALSE}
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
-    metadata <- meddata5[,(cols+1):meddata5dim[2]]
-    metadata$sample <- meddata5$sample
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
+    metadata <- phydata5[,(cols+1):phydata5dim[2]]
+    metadata$sample <- phydata5$sample
     index <-diversity(raredata, index= "shannon")
     shannondata5 <- as.data.frame(index)
-    shannondata5$sample<- meddata5$sample
+    shannondata5$sample<- phydata5$sample
     shannondata5_2 <- merge(shannondata5, metadata, by="sample")
 
     sh <- plot_ly(shannondata5_2, x=~treatment, y=~index, color=~treatment, colors=mycol, type="box", boxpoints = "all", pointpos = 0, jitter = 0.5)
@@ -918,7 +919,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 
 ```{r phy_diversity_analysis1b, echo=FALSE}
 #divided because error
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
     if (params$stats == "true" ) {
         shannonaov <- aov(index ~ treatment, data= shannondata5_2)
         st <- shapiro.test(resid(shannonaov))
@@ -977,7 +978,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 
 <br>
 ```{r phy_diversity_analysis2a, echo=FALSE}
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
     index <- diversity(raredata, index= "simpson")
     simpsondata5 <- as.data.frame(index)
     simpsondata5$sample<- meddata5$sample
@@ -992,7 +993,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 ```
 
 ```{r phy_diversity_analysis2b, echo=FALSE}
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
     if (params$stats == "true" ) {
         simpsonaov <- aov(index ~ treatment, data= simpsondata5_2)
         st <- shapiro.test(resid(simpsonaov))
@@ -1051,11 +1052,11 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 
 <br>
 ```{r phy_diversity_analysis3a, echo=FALSE}
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
-    mind5<-min(meddata5$sum)
-    index <- rarefy(meddata5[,2:cols], sample=mind5)
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
+    mind5<-min(phydata5$sum)
+    index <- rarefy(phydata5[,2:cols], sample=mind5)
     rarerichnessdata5 <- as.data.frame(index)
-    rarerichnessdata5$sample <-meddata5$sample
+    rarerichnessdata5$sample <-phydata5$sample
     richdata5_2 <- merge(rarerichnessdata5, metadata, by="sample")
 
     ri <- plot_ly(richdata5_2, x=~treatment, y=~index, color=~treatment, colors=mycol, type="box", boxpoints = "all", pointpos = 0, jitter = 0.5)
@@ -1066,8 +1067,8 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 }
 ```
 
-```{r med_diversity_analysis3b, echo=FALSE}
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+```{r phylo_diversity_analysis3b, echo=FALSE}
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
     if (params$stats == "true" ) {
         richaov <- aov(index ~ treatment, data= richdata5_2)
         st <- shapiro.test(resid(richaov))
@@ -1125,8 +1126,8 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 
 <div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;PhyloGroup Distance To Centroid</h2></div>
 <br>
-```{r med_distance2a, echo=FALSE}
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+```{r phylo_distance2a, echo=FALSE}
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
     ##Distance
     intermediate <- raredata
     bray.distance <- vegdist(sqrt(intermediate), method="bray")
@@ -1134,7 +1135,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
     ##Dispersion
     disper <- betadisper(bray.distance, group = metadata$treatment, type="centroid")
     df <- data.frame(Distance_to_centroid=disper$distances,Group=disper$group)
-    df$sample <- meddata5$sample
+    df$sample <- phydata5$sample
     df2 <- merge(df, metadata, by="sample")
 
     cen <- plot_ly(df2, x=~treatment, y=~Distance_to_centroid, color=~treatment, colors=mycol, type="box", boxpoints = "all", pointpos = 0, jitter = 0.5)
@@ -1144,10 +1145,10 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
     cen
 }
 ```
-```{r med_distance2b, echo=FALSE}
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+```{r phylo_distance2b, echo=FALSE}
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
     if (params$stats == "true" ) {
-        adn <- adonis(bray.distance~meddata5$treatment)
+        adn <- adonis(bray.distance~phydata5$treatment)
         adn
     } else {
         print("Stats skipped. To toggle on use \"--stats run\" in vAMPirus launch command")
@@ -1167,8 +1168,8 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 ### PhyloGroup 2D NMDS
 
 <br>
-```{r med_nmds2d, echo=FALSE}
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+```{r phylo_nmds2d, echo=FALSE}
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
     ##NMDS
     datax <- decostand(raredata,method="total") #method 'total' normalizes data to sum up to 1 --data5[,2:cols]
 
@@ -1182,7 +1183,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
     if (MDS$converged == "TRUE") {
 
     data.scores <- as.data.frame(scores(MDS))
-    data.scores$sample <- meddata5$sample
+    data.scores$sample <- phydata5$sample
     data.scores.2 <- merge(data.scores, metadata, by="sample")
 
     p <- ggplot(data.scores.2, aes(x=NMDS1, y=NMDS2,color=treatment))+
@@ -1228,8 +1229,8 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 
 <br>
 
-```{r med_nmds3d, echo=FALSE}
-if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
+```{r phylo_nmds3d, echo=FALSE}
+if (params$asvTClust == "true" || params$aminoTClust == "true"){
     MDS3 <- metaMDS(sqrt(datax),
                    distance = "bray",autotransform = FALSE,
                    k = 3,
@@ -1240,7 +1241,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
     if (MDS3$converged == "TRUE") {
 
     data.scores3 <- as.data.frame(scores(MDS3))
-    data.scores3$sample <- meddata5$sample
+    data.scores3$sample <- phydata5$sample
     data.scores.3 <- merge(data.scores3, metadata, by="sample")
     p3d <- plot_ly(data.scores.3, x=~NMDS1, y=~NMDS2, z=~NMDS3, text=~paste("Sample: ", sample),
             color=~treatment, colors=mycol,

From ebceb7764e9e47f66393279af64eb04d3056f89f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ram=C3=B3n=20Rivera-Vic=C3=A9ns?= <riveravicens@gmail.com>
Date: Wed, 20 Oct 2021 17:49:30 -0400
Subject: [PATCH 28/47] Add missing parameters

---
 bin/vAMPirus_Report.Rmd | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index f717013..c83b3af 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -18,6 +18,8 @@ params:
     aminoMED: !r commandArgs(trailingOnly=T)[12]
     type: !r commandArgs(trailingOnly=T)[13]
     nodeCol: !r commandArgs(trailingOnly=T)[14]
+    asvTClust: !r commandArgs(trailingOnly=T)[15]
+    aminoTClust: !r commandArgs(trailingOnly=T)[16]
 ---
 <style>
 .rectangle {
@@ -882,7 +884,6 @@ rarefaction <- rarecurve(phydata5[,2:cols])
 ##rarefied dataset
 raredata <- as.data.frame(rrarefy(phydata5[,2:cols], sample=minreads))
 }
-}
 ```
 
 <br>

From eaeea1517d1dca29cae266ffce11b4724227853b Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 16:50:40 -0500
Subject: [PATCH 29/47] added

---
 vAMPirus.nf | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index 90cb72c..fc98ece 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -4422,7 +4422,9 @@ if (params.DataCheck || params.Analyze) {
                         ${params.asvMED} \
                         ${params.aminoMED} \
                         \${type} \
-                        ${params.nodeCol}
+                        ${params.nodeCol} \
+                        ${params.asvTClust} \
+                        ${params.aminoTClust}
                         """
                 }
 

From 62c7c1154c3b6b5fd8986dce9e4800d02f4cd976 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 16:53:16 -0500
Subject: [PATCH 30/47] phydata

---
 bin/vAMPirus_Report.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index c83b3af..c0446a8 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -982,7 +982,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 if (params$asvTClust == "true" || params$aminoTClust == "true"){
     index <- diversity(raredata, index= "simpson")
     simpsondata5 <- as.data.frame(index)
-    simpsondata5$sample<- meddata5$sample
+    simpsondata5$sample<- phydata5$sample
     simpsondata5_2 <- merge(simpsondata5, metadata, by="sample")
 
     s <- plot_ly(simpsondata5_2, x=~treatment, y=~index, color=~treatment, colors=mycol, type="box", boxpoints = "all", pointpos = 0, jitter = 0.5)

From b6601bfc4a40bcf792ee2ba42eb55549a2bdeb18 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 17:07:25 -0500
Subject: [PATCH 31/47] added phycolor to ndoes in report

---
 bin/vAMPirus_Report.Rmd | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index c0446a8..5c4b898 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -171,6 +171,10 @@ fi
 if [ `ls -1 *_quicker_taxbreakdown.csv | wc -l` -ge 1 ];then
     cat *_quicker_taxbreakdown.csv > quicker_taxbreakdown.csv
 fi
+if [`ls -1 *_phyloGroupingcounts.csv | wc -l` -ge 1 ];then
+    mv *_phyloGroupingcounts.csv phylogroupcounts.csv
+    cp *_phylogroup.csv ./phygroup.csv
+fi
 ```
 ```{r load_datasets, include=FALSE}
 sample_name="counts.csv"
@@ -753,6 +757,20 @@ if (params$skipPhylogeny == "true") {
     gg = ggplotly(p2, tooltip = c("label","tax"))
     gg <- gg %>% config(toImageButtonOptions=list(format='svg',filename='Tree_Tax_colors', height= 500, width= 800, scale= 1))
     gg
+} else if (( params$asvTClust == "true") || (params$aminoTClust == "true") && (params$nodeCol == "TC"))
+    taxMap=read.csv("phygroup.csv", header=T, check.names = F)
+    tree=read.tree("tree.txt")
+    p1 <- ggtree(tree)
+    id <- tree$tip.label
+    dat <- tibble::tibble(id = id)
+    metat <- p1$data %>% dplyr::inner_join(dat, c('label' = 'id'))
+    colnames(taxMap) <- c("label","tax")
+    taxMap
+    taxDF <- merge(metat, taxMap, by="label")
+    p2 <- p1 + geom_point(data = taxDF, aes(x = x, y = y, label = label, color = tax))
+    gg = ggplotly(p2, tooltip = c("label","tax"))
+    gg <- gg %>% config(toImageButtonOptions=list(format='svg',filename='Tree_Tax_colors', height= 500, width= 800, scale= 1))
+    gg
 } else {
     #tree=read.newick("vAMPrun_otu.55.raxml.support")
     tree=read.tree("tree.txt")
@@ -782,7 +800,6 @@ if [[ $(ls | grep -c "_phylogroup.csv") -eq 1 ]];then
   done
   paste -d "," *.col > phylogroup.csv
   rm *.col
-  mv *_phyloGroupingcounts.csv phylogroupcounts.csv
 else
     echo "Not phylogeny based clustering"
 fi

From 24599e6721294ef995c07a2b35321966d3e3bf9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ram=C3=B3n=20Rivera-Vic=C3=A9ns?= <riveravicens@gmail.com>
Date: Wed, 20 Oct 2021 18:08:02 -0400
Subject: [PATCH 32/47] Change name

---
 bin/vAMPirus_Report.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index 5c4b898..533a97e 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -36,7 +36,7 @@ knitr::opts_chunk$set(echo = TRUE,
       out.width="100%")
 ```
 
-```{r pathways, echo=FALSE}
+```{r logo, echo=FALSE}
 knitr::include_graphics("vamplogo.png")
 ```
 

From c15cd4846d0f34b05491a77d20cddea49103bb79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ram=C3=B3n=20Rivera-Vic=C3=A9ns?= <riveravicens@gmail.com>
Date: Wed, 20 Oct 2021 18:11:01 -0400
Subject: [PATCH 33/47] Fix missing {

---
 bin/vAMPirus_Report.Rmd | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index 533a97e..ef2d79b 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -757,7 +757,7 @@ if (params$skipPhylogeny == "true") {
     gg = ggplotly(p2, tooltip = c("label","tax"))
     gg <- gg %>% config(toImageButtonOptions=list(format='svg',filename='Tree_Tax_colors', height= 500, width= 800, scale= 1))
     gg
-} else if (( params$asvTClust == "true") || (params$aminoTClust == "true") && (params$nodeCol == "TC"))
+} else if (( params$asvTClust == "true") || (params$aminoTClust == "true") && (params$nodeCol == "TC")) {
     taxMap=read.csv("phygroup.csv", header=T, check.names = F)
     tree=read.tree("tree.txt")
     p1 <- ggtree(tree)
@@ -783,7 +783,6 @@ if (params$skipPhylogeny == "true") {
     gg <- gg %>% config(toImageButtonOptions=list(format='svg',filename='Tree_id_colors', height= 500, width= 800, scale= 1))
     gg
 }
-
 ```
 This tree is a maximum likelihood tree made with IQTREE2 and the parameters you specified in the vampirus.config file. Also, this is an interactive tree, you can zoom in and hover on nodes to know the sequence ID. For a better visualization of this tree, you can find the *.treefile with bootstrap support values within the results directory and visualize using programs like FigTree or ITOL. If you ran the MED analysis, the colors of the nodes correspond to the MED group they were assigned to.
 

From 2e2877b7cb8948c740bef23323ac4ddb0eb92090 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ram=C3=B3n=20Rivera-Vic=C3=A9ns?= <riveravicens@gmail.com>
Date: Wed, 20 Oct 2021 18:16:45 -0400
Subject: [PATCH 34/47] Change name

---
 bin/vAMPirus_Report.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index ef2d79b..79c20e5 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -769,7 +769,7 @@ if (params$skipPhylogeny == "true") {
     taxDF <- merge(metat, taxMap, by="label")
     p2 <- p1 + geom_point(data = taxDF, aes(x = x, y = y, label = label, color = tax))
     gg = ggplotly(p2, tooltip = c("label","tax"))
-    gg <- gg %>% config(toImageButtonOptions=list(format='svg',filename='Tree_Tax_colors', height= 500, width= 800, scale= 1))
+    gg <- gg %>% config(toImageButtonOptions=list(format='svg',filename='Tree_Phygroup_colors', height= 500, width= 800, scale= 1))
     gg
 } else {
     #tree=read.newick("vAMPrun_otu.55.raxml.support")

From ffd9f360fbac12fd42f4c6ed0dad781dd82317c4 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 17:19:53 -0500
Subject: [PATCH 35/47] fix missing space

---
 bin/vAMPirus_Report.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index 5c4b898..cdb6fc9 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -171,7 +171,7 @@ fi
 if [ `ls -1 *_quicker_taxbreakdown.csv | wc -l` -ge 1 ];then
     cat *_quicker_taxbreakdown.csv > quicker_taxbreakdown.csv
 fi
-if [`ls -1 *_phyloGroupingcounts.csv | wc -l` -ge 1 ];then
+if [ `ls -1 *_phyloGroupingcounts.csv | wc -l` -ge 1 ];then
     mv *_phyloGroupingcounts.csv phylogroupcounts.csv
     cp *_phylogroup.csv ./phygroup.csv
 fi

From 9e87ddce8666b50f25a38e41f8f5c8fd16f349a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ram=C3=B3n=20Rivera-Vic=C3=A9ns?= <riveravicens@gmail.com>
Date: Wed, 20 Oct 2021 18:24:51 -0400
Subject: [PATCH 36/47] Add pip

---
 vampirus_env.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vampirus_env.yml b/vampirus_env.yml
index ccdcbf3..d00aaf2 100644
--- a/vampirus_env.yml
+++ b/vampirus_env.yml
@@ -36,6 +36,7 @@ dependencies:
   - r-devtools
   - pigz=2.4
   - r-biocmanager
+  - pip
   - pip:
     - oligotyping
     - treecluster

From aa54843a5c456b35d3369f6da8982ab6c20c19ee Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 17:25:40 -0500
Subject: [PATCH 37/47] edits

---
 bin/vAMPirus_Report.Rmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index faf9721..80b0120 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -508,7 +508,7 @@ if (params$stats == "true" ) {
 
 <br>
 
-### 2D NMDS
+### 2D NMDS or PCoA
 
 <br>
 ```{r nmds2d, echo=FALSE}
@@ -562,7 +562,7 @@ fig
 <br>
 <br>
 
-### 3D NMDS
+### 3D NMDS or PCoA
 
 <br>
 

From 01b43848b2d7f2b6b3940bd015eddf788a0ab24b Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 20:04:28 -0500
Subject: [PATCH 38/47] updated headers in report

---
 bin/vAMPirus_Report.Rmd | 74 ++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 35 deletions(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index 80b0120..1002024 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -63,7 +63,10 @@ mycol=c('#088da5','#73cdc8','#ff6f61','#7cb8df','#88b04b','#00a199','#6B5B95','#
 NOTE: Most plots are interactive and you can use the legend to specify samples/treatment of interest. You can also download an .svg version of each figure within this report.
 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 <br>
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Pre- and Post-Adapter Removal Read Stats</h2></div>
+<br>
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Read Processing Info/Results</h2></div>
+<br>
+# Pre- and Post-Adapter Removal Read Stats
 <br>
 ```{r readstats, echo=FALSE}
 if (params$skipReadProcessing == "true" || params$skipMerging == "true" ) {
@@ -88,7 +91,7 @@ if (params$skipReadProcessing == "true" || params$skipMerging == "true" ) {
 <br>
 
 
-### Total number of reads before and after adapter removal
+# Total number of reads before and after adapter removal
 
 ```{r readstats_plot, echo=FALSE}
 # Plot of reads before and after
@@ -115,7 +118,7 @@ if (params$skipReadProcessing == "true" || params$skipMerging == "true" ) {
 ```
 <br>
 
-### Forward (R1) and reverse (R2) read length before and after adapter removal
+# Forward (R1) and reverse (R2) read length before and after adapter removal
 
 ```{r readstats_plot2, echo=FALSE}
 # Plot of R1 and R2 before and after
@@ -214,7 +217,7 @@ data4 <- data3%>%
 ```
 
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Number of Reads Per Sample</h2></div>
+# Number of Reads Per Sample
 
 ```{r plot, echo=FALSE}
 # sample and count
@@ -264,7 +267,7 @@ raredata <- as.data.frame(rrarefy(data5[,2:cols], sample=minreads))
 <br>
 <br>
 
-### Shannon diversity
+# Shannon diversity
 
 <br>
 ```{r diversity_analysis, echo=FALSE}
@@ -334,7 +337,7 @@ if (params$stats == "true" ) {
 <br>
 <br>
 
-### Simpson diversity
+# Simpson diversity
 
 <br>
 ```{r diversity_analysis2, echo=FALSE}
@@ -402,7 +405,7 @@ if (params$stats == "true" ) {
 <br>
 <br>
 
-### Richness
+# Richness
 
 <br>
 ```{r diversity_analysis3, echo=FALSE}
@@ -472,7 +475,7 @@ if (params$stats == "true" ) {
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Distance To Centroid</h2></div>
+# Distance To Centroid
 <br>
 ```{r distance, echo=FALSE}
 ##Distance
@@ -504,11 +507,11 @@ if (params$stats == "true" ) {
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;NMDS Plots</h2></div>
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;NMDS or PCoA Plots</h2></div>
 
 <br>
 
-### 2D NMDS or PCoA
+# 2D NMDS or PCoA
 
 <br>
 ```{r nmds2d, echo=FALSE}
@@ -562,7 +565,7 @@ fig
 <br>
 <br>
 
-### 3D NMDS or PCoA
+# 3D NMDS or PCoA
 
 <br>
 
@@ -789,7 +792,8 @@ This tree is a maximum likelihood tree made with IQTREE2 and the parameters you
 <br>
 <br>
 <br>
-# Post Phylogeny-based Clustering Analyses
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Phylogeny-based Clustering Results</h2></div>
+# Phylogeny-based Clustering Summary Table
 ```{bash bash_phy_table, include=FALSE}
 if [[ $(ls | grep -c "_phylogroup.csv") -eq 1 ]];then
   awk -F "," '{print $2}' *_phylogroup.csv | sort | uniq | sort -g >group.list
@@ -807,7 +811,7 @@ fi
 ```{r phyloGroup_table, echo=FALSE}
 if (params$asvTClust == "true" || params$aminoTClust == "true"){
     phylogroup=read.csv("phylogroup.csv", header = T)
-    knitr::kable(phylogroup, digits = 2, align = 'c', booktabs = TRUE, caption = "Table: PhyloGroup Table") %>%
+    knitr::kable(phylogroup, digits = 2, align = 'c', booktabs = TRUE, caption = "Table: phyloGroup Table") %>%
     kable_styling(font_size = 12, full_width = F)%>%
     scroll_box(width = "100%", height = "100%")
 }
@@ -819,7 +823,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;PhyloGroup Relative Abundance Plot</h2></div>
+# phyloGroup Relative Abundance Plot
 
 <br>
 <br>
@@ -887,7 +891,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;PhyloGroup Rarefaction Curves</h2></div>
+# phyloGroup-based Rarefaction Curves
 
 <br>
 <br>
@@ -909,12 +913,12 @@ raredata <- as.data.frame(rrarefy(phydata5[,2:cols], sample=minreads))
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;PhyloGroup Diversity Analyses Plots</h2></div>
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;phyloGroup-based Diversity Analyses Plots</h2></div>
 
 <br>
 <br>
 
-### PhyloGroup Shannon diversity
+# phyloGroup-based Shannon diversity
 
 <br>
 ```{r phy_diversity_analysis1a, echo=FALSE}
@@ -991,7 +995,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 <br>
 
-### PhyloGroup Simpson diversity
+# phyloGroup-based Simpson diversity
 
 <br>
 ```{r phy_diversity_analysis2a, echo=FALSE}
@@ -1065,7 +1069,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 <br>
 
-### PhyloGroup Richness
+# phyloGroup-based Richness
 
 <br>
 ```{r phy_diversity_analysis3a, echo=FALSE}
@@ -1141,7 +1145,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;PhyloGroup Distance To Centroid</h2></div>
+# phyloGroup-based Distance To Centroid
 <br>
 ```{r phylo_distance2a, echo=FALSE}
 if (params$asvTClust == "true" || params$aminoTClust == "true"){
@@ -1178,11 +1182,11 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;NMDS Plots</h2></div>
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;phyloGroup-based NMDS or PCoA Plots</h2></div>
 
 <br>
 
-### PhyloGroup 2D NMDS
+# phyloGroup-based 2D NMDS or PcOA
 
 <br>
 ```{r phylo_nmds2d, echo=FALSE}
@@ -1242,7 +1246,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 <br>
 
-### PhyloGroup 3D NMDS
+# phyloGroup-based 3D NMDS and PCoA
 
 <br>
 
@@ -1291,9 +1295,9 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 <br>
 
-# Post Minimum Entropy Decomposition (MED) Analyses
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Minimum Energy Decomposition (MED) Results</h2></div>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;MED Group Breakdown Table</h2></div>
+# MED Group Breakdown
 
 <br>
 <br>
@@ -1354,7 +1358,7 @@ if (params$aminoMED == "true" && params$type == "AminoType"){
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Post-MED Relative Abundance Plot</h2></div>
+# MED Group Relative Abundance Plot
 
 <br>
 <br>
@@ -1475,7 +1479,7 @@ if (params$aminoMED == "true" && params$type == "AminoType"){
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Post-Med Rarefaction Curves</h2></div>
+# MED Group-based Rarefaction Curves</h2></div>
 
 <br>
 <br>
@@ -1502,7 +1506,7 @@ raredata <- as.data.frame(rrarefy(meddata5[,2:cols], sample=minreads))
 <br>
 <br>
 
-### Post-MED Shannon diversity
+# MED Group-based Shannon diversity
 
 <br>
 ```{r med_diversity_analysis1a, echo=FALSE}
@@ -1579,7 +1583,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 <br>
 <br>
 
-### Post-MED Simpson diversity
+# MED Group-based Simpson diversity
 
 <br>
 ```{r med_diversity_analysis2a, echo=FALSE}
@@ -1653,7 +1657,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 <br>
 <br>
 
-### Post-MED Richness
+# MED Group-based Richness
 
 <br>
 ```{r med_diversity_analysis3a, echo=FALSE}
@@ -1729,7 +1733,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Post-MED Distance To Centroid</h2></div>
+# MED Group-based Distance To Centroid
 <br>
 ```{r med_distance2a, echo=FALSE}
 if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "ASV" || params$type == "AminoType")){
@@ -1766,11 +1770,11 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;NMDS Plots</h2></div>
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;MED Group-based NMDS or PCoA Plots</h2></div>
 
 <br>
 
-### Post-MED 2D NMDS
+# MED Group-based 2D NMDS or PCoA
 
 <br>
 ```{r med_nmds2d, echo=FALSE}
@@ -1830,7 +1834,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 <br>
 <br>
 
-### Post-MED 3D NMDS
+# MED Group-based 3D NMDS or PCoA
 
 <br>
 
@@ -1881,7 +1885,7 @@ if ((params$asvMED == "true" || params$aminoMED == "true") && (params$type == "A
 <br>
 <br>
 
-<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;MED Group Representatives Phylogenetic Tree</h2></div>
+<div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;MED Group Representative Phylogenetic Tree</h2></div>
 
 <br>
 <br>

From 10c00580d6e6c2e6324722839c84707f02db19de Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 20:09:15 -0500
Subject: [PATCH 39/47] updated version in config

---
 vampirus.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vampirus.config b/vampirus.config
index 3e854a3..086a4f7 100644
--- a/vampirus.config
+++ b/vampirus.config
@@ -380,5 +380,5 @@ manifest {
     description = 'Automated virus amplicon sequencing analysis program'
     mainScript = 'vAMPirus.nf'
     nextflowVersion = '>=21.04.1'
-    version = '2.0.0'
+    version = '2.0.1'
 }

From f1a85ca3d58e1a6601c8d94ab98ec04241316233 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 20:14:00 -0500
Subject: [PATCH 40/47] update

---
 vAMPirus.nf | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vAMPirus.nf b/vAMPirus.nf
index fc98ece..6eeca62 100644
--- a/vAMPirus.nf
+++ b/vAMPirus.nf
@@ -524,6 +524,8 @@ log.info """\
                                                              pcASV:          ${params.pcASV}
                                                            ASV MED:          ${params.asvMED}
                                                      AminoType MED:          ${params.aminoMED}
+                                    Phylogeny-based ASV clustering:          ${params.asvTClust}
+                              Phylogeny-based AminoType clustering:          ${params.aminoTClust}
                                                        Skip FastQC:          ${params.skipFastQC}
                                               Skip read processing:          ${params.skipReadProcessing}
                                               Skip adapter removal:          ${params.skipAdapterRemoval}

From a03ce712108002a09cc42ae0cb8b491d23ced6d7 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 20:16:47 -0500
Subject: [PATCH 41/47] updated test config

---
 example_data/conf/test.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/example_data/conf/test.config b/example_data/conf/test.config
index 660912b..8ea76ad 100644
--- a/example_data/conf/test.config
+++ b/example_data/conf/test.config
@@ -26,4 +26,6 @@ params {
     asvMED = true
     asvTClust = true
     aminoTClust = true
+    ncASV = true
+    pcASV = true
 }

From a07b941719e8cb1f9e8ff55d41836d5130fb681a Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 20:19:10 -0500
Subject: [PATCH 42/47] updated docs

---
 README.md           | 14 +++++++-------
 vampirus_startup.sh |  8 ++++----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 588be48..cd39b61 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 # Table of contents
 * [New in vAMPirus version 2.0.0](#New-in-vAMPirus-version-2.0.0)
 * [Quick intro](#Quick-intro)
-  * [Contact/support](#Contact/support)  
+  * [Contact/support](#Contact/support)
 * [Getting started](#Getting-started)
   * [Order of operations](##Order-of-operations)
     * [Dependencies](###Dependencies-(See-How-to-cite))
@@ -70,7 +70,7 @@ If you have a feature request or any feedback/questions, feel free to email vAMP
 
 ## Quick order of operations
 
-1. Clone vAMPirus from github   
+1. Clone vAMPirus from github
 
 2. Before launching the vAMPirus.nf, be sure to run the vampirus_startup.sh script to install dependencies and/or databases (NOTE: You will need to have the xz program installed before running startup script when downloading the RVDB database)
 
@@ -87,7 +87,7 @@ If you have a feature request or any feedback/questions, feel free to email vAMP
 8. Explore results directories and produced final reports
 
 
-### Installing dependencies (see Who to cite section)    
+### Installing dependencies (see Who to cite section)
 
 If you plan on using Conda to run vAMPirus, all dependencies will be installed as a Conda environment automatically with the vampirus_startup.sh script.
 
@@ -215,19 +215,19 @@ Launch commands for testing (you do not need to edit anything in the config file
 
 ### DataCheck test =>
 
-      /path/to/nextflow run /path/to/vAMPirus.nf -c /path/to/vampirus.config -profile conda,test --DataCheck
+      `/path/to/nextflow run /path/to/vAMPirus.nf -c /path/to/vampirus.config -profile conda,test --DataCheck -resume`
 
 OR
 
-      nextflow run vAMPirus.nf -c vampirus.config -profile singularity,test --DataCheck
+      `nextflow run vAMPirus.nf -c vampirus.config -profile singularity,test --DataCheck -resume`
 
 ### Analyze test =>
 
-      `/path/to/nextflow run /path/to/vAMPirus.nf -c /path/to/vampirus.config -profile conda,test --Analyze`
+      `/path/to/nextflow run /path/to/vAMPirus.nf -c /path/to/vampirus.config -profile conda,test --Analyze -resume`
 
 OR
 
-      `nextflow run vAMPirus.nf -c vampirus.config -profile singularity,test --Analyze`
+      `nextflow run vAMPirus.nf -c vampirus.config -profile singularity,test --Analyze -resume`
 
 
 # Running vAMPirus
diff --git a/vampirus_startup.sh b/vampirus_startup.sh
index bb16b21..dd61c97 100644
--- a/vampirus_startup.sh
+++ b/vampirus_startup.sh
@@ -293,18 +293,18 @@ then
       echo "   " >> EXAMPLE_COMMANDS.txt
       echo "Checking DataCheck mode:" >> EXAMPLE_COMMANDS.txt
       echo "   " >> EXAMPLE_COMMANDS.txt
-      echo ""$mypwd"/nextflow run  "$mypwd"/vAMPirus.nf -c  "$mypwd"/vampirus.config -profile conda,test --DataCheck" >> EXAMPLE_COMMANDS.txt
+      echo ""$mypwd"/nextflow run  "$mypwd"/vAMPirus.nf -c  "$mypwd"/vampirus.config -profile conda,test --DataCheck -resume" >> EXAMPLE_COMMANDS.txt
       echo "   " >> EXAMPLE_COMMANDS.txt
       echo "Or if you plan to run vAMPirus using Singularity, use this test command:" >> EXAMPLE_COMMANDS.txt
       echo "   " >> EXAMPLE_COMMANDS.txt
-      echo ""$mypwd"/nextflow run  "$mypwd"/vAMPirus.nf -c  "$mypwd"/vampirus.config -profile singularity,test --DataCheck" >> EXAMPLE_COMMANDS.txt
+      echo ""$mypwd"/nextflow run  "$mypwd"/vAMPirus.nf -c  "$mypwd"/vampirus.config -profile singularity,test --DataCheck -resume" >> EXAMPLE_COMMANDS.txt
       echo "    " >> EXAMPLE_COMMANDS.txt
       echo "Next, test the analysis pipeline:" >> EXAMPLE_COMMANDS.txt
-      echo ""$mypwd"/nextflow run  "$mypwd"/vAMPirus.nf -c  "$mypwd"/vampirus.config -profile conda,test --Analyze --ncASV --pcASV --stats" >> EXAMPLE_COMMANDS.txt
+      echo ""$mypwd"/nextflow run  "$mypwd"/vAMPirus.nf -c  "$mypwd"/vampirus.config -profile conda,test --Analyze -resume" >> EXAMPLE_COMMANDS.txt
       echo "   " >> EXAMPLE_COMMANDS.txt
       echo "Or if you plan to run vAMPirus using Singularity, use this test command:" >> EXAMPLE_COMMANDS.txt
       echo "   " >> EXAMPLE_COMMANDS.txt
-      echo ""$mypwd"/nextflow run  "$mypwd"/vAMPirus.nf -c  "$mypwd"/vampirus.config -profile singularity,test --Analyze --ncASV --pcASV --stats" >> EXAMPLE_COMMANDS.txt
+      echo ""$mypwd"/nextflow run  "$mypwd"/vAMPirus.nf -c  "$mypwd"/vampirus.config -profile singularity,test --Analyze -resume" >> EXAMPLE_COMMANDS.txt
       echo "--------------------------------------------------------------------------------------------------------------------------------" >> EXAMPLE_COMMANDS.txt
       echo "   " >> EXAMPLE_COMMANDS.txt
       echo "Ok, if everything went well (green text was spit out by Nextflow), now you can move on to the fun. First, you should review the help docs and the vampirus.config in the vAMPirus directory." >> EXAMPLE_COMMANDS.txt

From 4c0efc3d7f098ad2453330a35f3203676311aff2 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 20:24:17 -0500
Subject: [PATCH 43/47] fix if statement in report

---
 bin/vAMPirus_Report.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index 1002024..61863be 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -760,7 +760,7 @@ if (params$skipPhylogeny == "true") {
     gg = ggplotly(p2, tooltip = c("label","tax"))
     gg <- gg %>% config(toImageButtonOptions=list(format='svg',filename='Tree_Tax_colors', height= 500, width= 800, scale= 1))
     gg
-} else if (( params$asvTClust == "true") || (params$aminoTClust == "true") && (params$nodeCol == "TC")) {
+} else if (( params$asvTClust == "true") || (params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType") && (params$nodeCol == "TC")) {
     taxMap=read.csv("phygroup.csv", header=T, check.names = F)
     tree=read.tree("tree.txt")
     p1 <- ggtree(tree)

From 9118751cb7caa21b03fa35a634d8e8061e837778 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Wed, 20 Oct 2021 20:39:33 -0500
Subject: [PATCH 44/47] added missing space

---
 bin/vAMPirus_Report.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index 61863be..d462e22 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -760,7 +760,7 @@ if (params$skipPhylogeny == "true") {
     gg = ggplotly(p2, tooltip = c("label","tax"))
     gg <- gg %>% config(toImageButtonOptions=list(format='svg',filename='Tree_Tax_colors', height= 500, width= 800, scale= 1))
     gg
-} else if (( params$asvTClust == "true") || (params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType") && (params$nodeCol == "TC")) {
+} else if ((params$asvTClust == "true") || (params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType") && (params$nodeCol == "TC")) {
     taxMap=read.csv("phygroup.csv", header=T, check.names = F)
     tree=read.tree("tree.txt")
     p1 <- ggtree(tree)

From 9b67de12f47e4ffccffcd2772f02ea47aa235604 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Thu, 21 Oct 2021 08:47:36 -0500
Subject: [PATCH 45/47] test change to if statement

---
 bin/vAMPirus_Report.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index d462e22..e4a9660 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -760,7 +760,7 @@ if (params$skipPhylogeny == "true") {
     gg = ggplotly(p2, tooltip = c("label","tax"))
     gg <- gg %>% config(toImageButtonOptions=list(format='svg',filename='Tree_Tax_colors', height= 500, width= 800, scale= 1))
     gg
-} else if ((params$asvTClust == "true") || (params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType") && (params$nodeCol == "TC")) {
+} else if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType") && (params$nodeCol == "TC")) {
     taxMap=read.csv("phygroup.csv", header=T, check.names = F)
     tree=read.tree("tree.txt")
     p1 <- ggtree(tree)

From 3ca58ae85d3e285527e871eee768e3dcae37b8f2 Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Thu, 21 Oct 2021 08:53:07 -0500
Subject: [PATCH 46/47] fixed phylogroup if statements

---
 bin/vAMPirus_Report.Rmd | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/bin/vAMPirus_Report.Rmd b/bin/vAMPirus_Report.Rmd
index e4a9660..07f5a9f 100644
--- a/bin/vAMPirus_Report.Rmd
+++ b/bin/vAMPirus_Report.Rmd
@@ -809,7 +809,7 @@ fi
 ```
 
 ```{r phyloGroup_table, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     phylogroup=read.csv("phylogroup.csv", header = T)
     knitr::kable(phylogroup, digits = 2, align = 'c', booktabs = TRUE, caption = "Table: phyloGroup Table") %>%
     kable_styling(font_size = 12, full_width = F)%>%
@@ -829,7 +829,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 
 ```{r phylo_plot, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
   #medgroup=read.csv("uVP_AminoType_Groupingcounts.csv", check.names=FALSE)
   phylogroup=read.csv("phylogroupcounts.csv", header = T, check.names=FALSE)
   phydata <- phylogroup %>% select(-2)
@@ -897,7 +897,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 
 ```{r phy_rarefaction, echo=FALSE, cache=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
 ##Rarefaction curves
 rarefaction <- rarecurve(phydata5[,2:cols])
 
@@ -922,7 +922,7 @@ raredata <- as.data.frame(rrarefy(phydata5[,2:cols], sample=minreads))
 
 <br>
 ```{r phy_diversity_analysis1a, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     metadata <- phydata5[,(cols+1):phydata5dim[2]]
     metadata$sample <- phydata5$sample
     index <-diversity(raredata, index= "shannon")
@@ -940,7 +940,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 
 ```{r phy_diversity_analysis1b, echo=FALSE}
 #divided because error
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     if (params$stats == "true" ) {
         shannonaov <- aov(index ~ treatment, data= shannondata5_2)
         st <- shapiro.test(resid(shannonaov))
@@ -999,7 +999,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 
 <br>
 ```{r phy_diversity_analysis2a, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     index <- diversity(raredata, index= "simpson")
     simpsondata5 <- as.data.frame(index)
     simpsondata5$sample<- phydata5$sample
@@ -1014,7 +1014,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 ```
 
 ```{r phy_diversity_analysis2b, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     if (params$stats == "true" ) {
         simpsonaov <- aov(index ~ treatment, data= simpsondata5_2)
         st <- shapiro.test(resid(simpsonaov))
@@ -1073,7 +1073,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 
 <br>
 ```{r phy_diversity_analysis3a, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     mind5<-min(phydata5$sum)
     index <- rarefy(phydata5[,2:cols], sample=mind5)
     rarerichnessdata5 <- as.data.frame(index)
@@ -1089,7 +1089,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 ```
 
 ```{r phylo_diversity_analysis3b, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     if (params$stats == "true" ) {
         richaov <- aov(index ~ treatment, data= richdata5_2)
         st <- shapiro.test(resid(richaov))
@@ -1148,7 +1148,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 # phyloGroup-based Distance To Centroid
 <br>
 ```{r phylo_distance2a, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     ##Distance
     intermediate <- raredata
     bray.distance <- vegdist(sqrt(intermediate), method="bray")
@@ -1167,7 +1167,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 }
 ```
 ```{r phylo_distance2b, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     if (params$stats == "true" ) {
         adn <- adonis(bray.distance~phydata5$treatment)
         adn
@@ -1190,7 +1190,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 
 <br>
 ```{r phylo_nmds2d, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     ##NMDS
     datax <- decostand(raredata,method="total") #method 'total' normalizes data to sum up to 1 --data5[,2:cols]
 
@@ -1251,7 +1251,7 @@ if (params$asvTClust == "true" || params$aminoTClust == "true"){
 <br>
 
 ```{r phylo_nmds3d, echo=FALSE}
-if (params$asvTClust == "true" || params$aminoTClust == "true"){
+if ((params$asvTClust == "true" || params$aminoTClust == "true") && (params$type == "ASV" || params$type == "AminoType")) {
     MDS3 <- metaMDS(sqrt(datax),
                    distance = "bray",autotransform = FALSE,
                    k = 3,

From 5431bacfd008f9be5e3479d6cdfb61d66f5ad0ed Mon Sep 17 00:00:00 2001
From: "@Aveglia" <alex.veglia@upr.edu>
Date: Thu, 21 Oct 2021 08:59:12 -0500
Subject: [PATCH 47/47] update

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cd39b61..d6e5dba 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 [![release](https://img.shields.io/github/v/release/Aveglia/vAMPirus?label=release&logo=github)](https://github.com/Aveglia/vAMPirus/releases/latest)
 
 # Table of contents
-* [New in vAMPirus version 2.0.0](#New-in-vAMPirus-version-2.0.0)
+* [New in vAMPirus version 2.0.1](#New-in-vAMPirus-version-2.0.1)
 * [Quick intro](#Quick-intro)
   * [Contact/support](#Contact/support)
 * [Getting started](#Getting-started)