labgem · jpjarnoux · Jun 10, 2024 · Mar 28, 2024 · Mar 28, 2024 · Mar 28, 2024
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -89,7 +89,7 @@ jobs:
         ppanggolin rarefaction --output stepbystep -f -p stepbystep/pangenome.h5 --depth 5 --min 1 --max 50 -ms 10 -fd -ck 30 -K 3 --soft_core 0.9 -se $RANDOM
         ppanggolin draw -p stepbystep/pangenome.h5 --tile_plot --nocloud --soft_core 0.92 --ucurve --output stepbystep -f
         ppanggolin rgp -p stepbystep/pangenome.h5 --persistent_penalty 2 --variable_gain 1 --min_score 3 --dup_margin 0.05
-        ppanggolin spot -p stepbystep/pangenome.h5 --spot_graph --overlapping_match 2 --set_size 3 --exact_match_size 1
+        ppanggolin spot -p stepbystep/pangenome.h5 --output stepbystep --spot_graph --overlapping_match 2 --set_size 3 --exact_match_size 1 -f
         ppanggolin draw -p stepbystep/pangenome.h5 --draw_spots -o stepbystep -f
         ppanggolin module -p stepbystep/pangenome.h5 --transitive 4 --size 3 --jaccard 0.86 --dup_margin 0.05
         ppanggolin write_pangenome -p stepbystep/pangenome.h5 --output stepbystep -f --soft_core 0.9 --dup_margin 0.06  --gexf --light_gexf --csv --Rtab --stats --partitions --compress --json --spots --regions --borders --families_tsv --cpu 1 
@@ -100,6 +100,7 @@ jobs:
         ppanggolin fasta -p stepbystep/pangenome.h5 --output stepbystep -f --prot_families module_0
         ppanggolin fasta -p stepbystep/pangenome.h5 --output stepbystep -f --prot_families core
         ppanggolin fasta -p stepbystep/pangenome.h5 --output stepbystep -f --gene_families module_0 --genes module_0
+        ppanggolin fasta -p stepbystep/pangenome.h5 --output stepbystep -f --proteins cloud --cpu $NUM_CPUS --keep_tmp 
 
         ppanggolin draw -p stepbystep/pangenome.h5 --draw_spots --spots all -o stepbystep -f
         ppanggolin metrics -p stepbystep/pangenome.h5 --genome_fluidity --no_print_info --recompute_metrics --log metrics.log

diff --git a/docs/user/writeFasta.md b/docs/user/writeFasta.md
@@ -18,7 +18,10 @@ When using the `softcore` filter, the `--soft_core` option can be used to modify
 
 ## Genes
 
-This option can be used to write the nucleotide CDS sequences. It can be used as such, to write all of the genes of the pangenome for example:
+### Nucleotide sequences
+
+With the `--genes partition` option PPanGGOLiN will write the nucleotide CDS sequences for the given partition.
+It can be used as such, to write all the genes of the pangenome for example:
 
 ```bash
 ppanggolin fasta -p pangenome.h5 --output MY_GENES --genes all
@@ -30,34 +33,72 @@ Or to write only the persistent genes:
 ppanggolin fasta -p pangenome.h5 --output MY_GENES --genes persistent
 ```
 
+### Protein sequences
+
+With the `--proteins partition` option PPanGGOLiN will write the nucleotide CDS sequences for the given partition. 
+It can be used as such, to write all the genes of the pangenome for example:
 
-## Protein families
+```bash
+ppanggolin fasta -p pangenome.h5 --output MY_GENES --proteins all
+```
 
-This option can be used to write the protein sequences of the representative sequences for each family. It can be used as such for all families:
+Or to write only the cloud genes:
+
+```bash
+ppanggolin fasta -p pangenome.h5 --output MY_GENES --genes_prot cloud
+```
+
+To translate the gene sequences, PPanGGOLiN uses the [MMSeqs2](https://github.com/soedinglab/MMseqs2) `translatenucs` command. 
+So for this option you can specify multiple threads with `--cpu`. 
+You can also specify the translation table to use with `--translate_table`. 
+Finally, you can keep the temporary directory -that you can specify with `--tmpdir`- with the [MMSeqs2](https://github.com/soedinglab/MMseqs2) database using the `--keep_tmp` option.
+
+## Gene families
+
+### Protein sequences
+
+With the `--prot_families partition` option PPanGGOLiN will write the protein sequences of the representative gene for each family for the given partition. 
+It can be used as such for all families:
 
 ```bash
 ppanggolin fasta -p pangenome.h5 --output MY_PROT --prot_families all
 ```
 
-or for all of the shell families for example:
+Or for all the shell families for example:
 
 ```bash
 ppanggolin fasta -p pangenome.h5 --output MY_PROT --prot_families shell
 ```
 
+### Nucleotide sequences
 
-## Gene families
-
-This option can be used to write the gene sequences of the representative sequences for each family. It can be used as such:
+With the `--gene_families partition` option PPanGGOLiN will write the nucleotide sequences of the representative gene for each family for the given partition. 
+It can be used as such for all families:
 
 ```bash
 ppanggolin fasta -p pangenome.h5 --output MY_GENES_FAMILIES --gene_families all
 ```
 
-or for the cloud families for example:
+Or for the core families for example:
+
+```bash
+ppanggolin fasta -p pangenome.h5 --output MY_GENES_FAMILIES --gene_families core
+```
+
+
+## Modules
+All the precedent command admit a module as partition.
+
+So you can write the protein sequences for the family in module_X as such:  
+
+```bash
+ppanggolin fasta -p pangenome.h5 --output MY_REGIONS --prot_families module_X
+```
+
+Or the nucleotide sequence of all genes in module_X:
 
 ```bash
-ppanggolin fasta -p pangenome.h5 --output MY_GENES_FAMILIES --gene_families cloud
+ppanggolin fasta -p pangenome.h5 --output MY_REGIONS --genes module_X
 ```
 
 ## Regions
@@ -73,4 +114,4 @@ It can be used as such:
 
 ```bash
 ppanggolin fasta -p pangenome.h5 --output MY_REGIONS --regions all --fasta genomes.fasta.list
-```
+```