Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make github CI workflow faster #225

Merged
merged 4 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 39 additions & 19 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ on:
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

env:
NUM_CPUS: 1

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
test:
Expand All @@ -16,8 +19,25 @@ jobs:
strategy:
matrix:
os: ['ubuntu-latest', 'macos-13']
python-version: ['3.8', '3.9', '3.10']
python-version: ['3.8', '3.10']

steps:

# Get number of cpu available on the current runner
- name: Get core number on linux
if: matrix.os == 'ubuntu-latest'
run: |
nb_cpu_linux=`nproc`
echo "Number of cores avalaible on the current linux runner $nb_cpu_linux"
echo "NUM_CPUS=$nb_cpu_linux" >> "$GITHUB_ENV"

- name: Get core number on macos
if: matrix.os == 'macos-13'
run: |
nb_cpu_macos=`sysctl -n hw.ncpu`
echo "Number of cores avalaible on the current macos runner $nb_cpu_macos"
echo "NUM_CPUS=$nb_cpu_macos" >> "$GITHUB_ENV"

# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v4
# Install requirements with miniconda
Expand Down Expand Up @@ -52,7 +72,7 @@ jobs:
run: |
cd testingDataset
mkdir info_to_test
ppanggolin all --cpu 1 --fasta genomes.fasta.list --output mybasicpangenome
ppanggolin all --cpu $NUM_CPUS --fasta genomes.fasta.list --output mybasicpangenome
ppanggolin info --pangenome mybasicpangenome/pangenome.h5 --content --parameters --status > info_to_test/mybasicpangenome_info.yaml
cat info_to_test/mybasicpangenome_info.yaml
cd -
Expand All @@ -62,10 +82,10 @@ jobs:
shell: bash -l {0}
run: |
cd testingDataset
ppanggolin annotate --fasta genomes.fasta.list --output stepbystep --kingdom bacteria --cpu 1
ppanggolin cluster -p stepbystep/pangenome.h5 --coverage 0.8 --identity 0.8 --cpu 1
ppanggolin annotate --fasta genomes.fasta.list --output stepbystep --kingdom bacteria --cpu $NUM_CPUS
ppanggolin cluster -p stepbystep/pangenome.h5 --coverage 0.8 --identity 0.8 --cpu $NUM_CPUS
ppanggolin graph -p stepbystep/pangenome.h5 -r 10
ppanggolin partition --output stepbystep -f -p stepbystep/pangenome.h5 --cpu 1 -b 2.6 -ms 10 -fd -ck 500 -Kmm 3 12 -im 0.04 --draw_ICL
ppanggolin partition --output stepbystep -f -p stepbystep/pangenome.h5 --cpu $NUM_CPUS -b 2.6 -ms 10 -fd -ck 500 -Kmm 3 12 -im 0.04 --draw_ICL
ppanggolin rarefaction --output stepbystep -f -p stepbystep/pangenome.h5 --depth 5 --min 1 --max 50 -ms 10 -fd -ck 30 -K 3 --soft_core 0.9 -se $RANDOM
ppanggolin draw -p stepbystep/pangenome.h5 --tile_plot --nocloud --soft_core 0.92 --ucurve --output stepbystep -f
ppanggolin rgp -p stepbystep/pangenome.h5 --persistent_penalty 2 --variable_gain 1 --min_score 3 --dup_margin 0.05
Expand All @@ -90,19 +110,19 @@ jobs:
shell: bash -l {0}
run: |
cd testingDataset
ppanggolin workflow --cpu 1 --anno genomes.gbff.list --output myannopang
ppanggolin msa --pangenome myannopang/pangenome.h5 --source dna --partition core -o myannopang/ -f --use_gene_id --phylo --single_copy --cpu 1
ppanggolin workflow --cpu $NUM_CPUS --anno genomes.gbff.list --output myannopang
ppanggolin msa --pangenome myannopang/pangenome.h5 --source dna --partition core -o myannopang/ -f --use_gene_id --phylo --single_copy --cpu $NUM_CPUS
ppanggolin info --pangenome myannopang/pangenome.h5 > info_to_test/myannopang_info.yaml
cat info_to_test/myannopang_info.yaml
cd -
- name: clusters reading from external file
shell: bash -l {0}
run: |
cd testingDataset
ppanggolin panrgp --anno genomes.gbff.list --cluster clusters.tsv --output readclusterpang --cpu 1
ppanggolin annotate --anno genomes.gbff.list --output readclusters --cpu 1
ppanggolin cluster --clusters clusters.tsv -p readclusters/pangenome.h5 --cpu 1
ppanggolin msa --pangenome readclusterpang/pangenome.h5 --partition persistent --phylo -o readclusterpang/msa/ -f --cpu 1
ppanggolin panrgp --anno genomes.gbff.list --cluster clusters.tsv --output readclusterpang --cpu $NUM_CPUS
ppanggolin annotate --anno genomes.gbff.list --output readclusters --cpu $NUM_CPUS
ppanggolin cluster --clusters clusters.tsv -p readclusters/pangenome.h5 --cpu $NUM_CPUS
ppanggolin msa --pangenome readclusterpang/pangenome.h5 --partition persistent --phylo -o readclusterpang/msa/ -f --cpu $NUM_CPUS
cd -
- name: testing rgp_cluster command
shell: bash -l {0}
Expand All @@ -117,17 +137,17 @@ jobs:
run: |
cd testingDataset
ppanggolin align --pangenome mybasicpangenome/pangenome.h5 --sequences some_chlam_proteins.fasta \
--output test_align --draw_related --getinfo --fast --cpu 1
--output test_align --draw_related --getinfo --fast --cpu $NUM_CPUS
cd -
- name: testing context command
shell: bash -l {0}
run: |
cd testingDataset
ppanggolin context --pangenome myannopang/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_context --fast --cpu 1
ppanggolin context --pangenome myannopang/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_context --fast --cpu $NUM_CPUS

# test from gene family ids. Test here with one family of module 1. The context should find all families of module 1
echo AP288_RS05055 > one_family_of_module_1.txt
ppanggolin context --pangenome myannopang/pangenome.h5 --family one_family_of_module_1.txt --output test_context_from_id --cpu 1
ppanggolin context --pangenome myannopang/pangenome.h5 --family one_family_of_module_1.txt --output test_context_from_id --cpu $NUM_CPUS
cd -
- name: testing metadata command
shell: bash -l {0}
Expand All @@ -142,31 +162,31 @@ jobs:



ppanggolin write_pangenome -p mybasicpangenome/pangenome.h5 --output mybasicpangenome -f --gexf --light_gexf --cpu 1
ppanggolin write_pangenome -p mybasicpangenome/pangenome.h5 --output mybasicpangenome -f --gexf --light_gexf --cpu $NUM_CPUS
ppanggolin rgp_cluster --pangenome mybasicpangenome/pangenome.h5 -o rgp_cluster_with_metadata --graph_formats graphml
cd -
- name: testing config file
shell: bash -l {0}
run: |
cd testingDataset
ppanggolin utils --default_config panrgp -o panrgp_default_config.yaml
ppanggolin panrgp --anno genomes.gbff.list --cluster clusters.tsv -o test_config --config panrgp_default_config.yaml --cpu 1
ppanggolin panrgp --anno genomes.gbff.list --cluster clusters.tsv -o test_config --config panrgp_default_config.yaml --cpu $NUM_CPUS
cd -
- name: testing projection cmd
shell: bash -l {0}
run: |
cd testingDataset
head genomes.gbff.list | sed 's/^/input_genome_/g' > genomes.gbff.head.list
ppanggolin projection --pangenome stepbystep/pangenome.h5 -o projection_from_list_of_gbff --anno genomes.gbff.head.list --gff --proksee --cpu 1
ppanggolin projection --pangenome stepbystep/pangenome.h5 -o projection_from_list_of_gbff --anno genomes.gbff.head.list --gff --proksee --cpu $NUM_CPUS


ppanggolin projection --pangenome mybasicpangenome/pangenome.h5 -o projection_from_single_fasta \
--genome_name chlam_A --fasta FASTA/GCF_002776845.1_ASM277684v1_genomic.fna.gz \
--spot_graph --graph_formats graphml --fast --keep_tmp -f --add_sequences --gff --proksee --table --add_metadata --cpu 1
--spot_graph --graph_formats graphml --fast --keep_tmp -f --add_sequences --gff --proksee --table --add_metadata --cpu $NUM_CPUS

ppanggolin projection --pangenome mybasicpangenome/pangenome.h5 -o projection_from_gff_prodigal \
--genome_name chlam_annotated_with_prodigal --anno GBFF/GCF_003788785.1_ct114V1_genomic_prodigal_annotation.gff.gz \
--gff --table --cpu 1
--gff --table --cpu $NUM_CPUS

- name: testing write_genome_cmds
shell: bash -l {0}
Expand Down
4 changes: 2 additions & 2 deletions testingDataset/launch_test_locally.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def parse_arguments(default_ci_yaml, testing_datadir):

parser.add_argument('-o', '--outdir', help="increase output verbosity", default='local_CI', type=Path)

parser.add_argument('-c', '--cpu', type=int, default=1,
parser.add_argument('-c', '--cpu', type=int, default=4,
help="Use this amount of cpu when number of cpu is specified in the command.")

parser.add_argument("-v", "--verbose", help="increase output verbosity",
Expand Down Expand Up @@ -127,7 +127,7 @@ def main():
command = step['run'].strip()

# process the command
command = command.replace('--cpu 1', f"--cpu {args.cpu}")
command = command.replace('$NUM_CPUS', f"{args.cpu}")
command = command.replace('cd ', "# cd ")

if args.skip_msa:
Expand Down