From ff96caf0d1fe4f416bd4d3e7743f93bd55a6ddc0 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 4 Feb 2025 17:46:25 +0100 Subject: [PATCH 1/7] added contigs as input --- q2_annotate/plugin_setup.py | 6 +++--- q2_annotate/prodigal/prodigal.py | 26 ++++++++++++++++---------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/q2_annotate/plugin_setup.py b/q2_annotate/plugin_setup.py index 93d9e216..b6f2347a 100644 --- a/q2_annotate/plugin_setup.py +++ b/q2_annotate/plugin_setup.py @@ -1208,10 +1208,10 @@ plugin.methods.register_function( function=q2_annotate.prodigal.predict_genes_prodigal, inputs={ - 'mags': FeatureData[MAG] | SampleData[MAGs] + 'sequences': FeatureData[MAG] | SampleData[MAGs] | SampleData[Contigs] }, input_descriptions={ - 'mags': 'MAGs for which one wishes to predict genes.' + 'sequences': 'MAGs or contigs for which one wishes to predict genes.' }, parameters={ "translation_table_number": Str % Choices([ @@ -1242,7 +1242,7 @@ 'proteins': "Fasta files (one per MAG) with the protein translation " "of the predicted genes." }, - name='Predict gene sequences from MAGs using Prodigal.', + name='Predict gene sequences from MAGs or contigs using Prodigal.', description="Prodigal (PROkaryotic DYnamic programming " "Gene-finding ALgorithm), a gene prediction algorithm " "designed for improved gene structure prediction, translation " diff --git a/q2_annotate/prodigal/prodigal.py b/q2_annotate/prodigal/prodigal.py index ce68fa65..a377acd1 100644 --- a/q2_annotate/prodigal/prodigal.py +++ b/q2_annotate/prodigal/prodigal.py @@ -10,14 +10,16 @@ from typing import Union from .._utils import run_command from q2_types.feature_data_mag import MAGSequencesDirFmt -from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt +from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt, ContigSequencesDirFmt from q2_types.genome_data import ( LociDirectoryFormat, GenesDirectoryFormat, ProteinsDirectoryFormat, ) def predict_genes_prodigal( - mags: Union[MAGSequencesDirFmt, MultiMAGSequencesDirFmt], + sequences: Union[ + MAGSequencesDirFmt, MultiMAGSequencesDirFmt, ContigSequencesDirFmt + ], translation_table_number: str = "11", ) -> (LociDirectoryFormat, GenesDirectoryFormat, ProteinsDirectoryFormat): @@ -33,7 +35,7 @@ def predict_genes_prodigal( "-f", "gff" ] - def _run_prodigal(path_to_input: str, mag_id: str, subdir: str = None): + def _run_prodigal(path_to_input: str, _id: str, subdir: str = None): # If subdirectory is not None, append a "/" s.t. the command # below is defined correctly. Otw subdir = "" subdir = subdir + "/" if subdir else "" @@ -42,18 +44,22 @@ def _run_prodigal(path_to_input: str, mag_id: str, subdir: str = None): cmd = cp.deepcopy(base_cmd) cmd.extend([ "-i", path_to_input, - "-o", os.path.join(loci.path, f"{subdir}{mag_id}.gff"), - "-a", os.path.join(proteins.path, f"{subdir}{mag_id}.fasta"), - "-d", os.path.join(genes.path, f"{subdir}{mag_id}.fasta") + "-o", os.path.join(loci.path, f"{subdir}{_id}.gff"), + "-a", os.path.join(proteins.path, f"{subdir}{_id}.fasta"), + "-d", os.path.join(genes.path, f"{subdir}{_id}.fasta") ]) run_command(cmd) - if isinstance(mags, MAGSequencesDirFmt): - for mag_id, mag_fp in mags.feature_dict().items(): + if isinstance(sequences, MAGSequencesDirFmt): + for _id, mag_fp in sequences.feature_dict().items(): + _run_prodigal(mag_fp, _id) + + elif isinstance(sequences, ContigSequencesDirFmt): + for mag_id, mag_fp in sequences.sample_dict().items(): _run_prodigal(mag_fp, mag_id) - elif isinstance(mags, MultiMAGSequencesDirFmt): - for sample_id, mags_dict in mags.sample_dict().items(): + elif isinstance(sequences, MultiMAGSequencesDirFmt): + for sample_id, mags_dict in sequences.sample_dict().items(): # Make sample_id folders in output locations for output_object in [loci, genes, proteins]: os.makedirs(os.path.join(output_object.path, sample_id)) From e9a8f4b08cb7286d3ffa7b5f3f630f93e99bf888 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 5 Feb 2025 14:26:57 +0100 Subject: [PATCH 2/7] fixed tests --- q2_annotate/prodigal/tests/test_prodigal.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/q2_annotate/prodigal/tests/test_prodigal.py b/q2_annotate/prodigal/tests/test_prodigal.py index 157053a1..4aefaaa7 100644 --- a/q2_annotate/prodigal/tests/test_prodigal.py +++ b/q2_annotate/prodigal/tests/test_prodigal.py @@ -24,7 +24,7 @@ def test_run_prodigal_feature_data_1_mag(self, subp_run): # Run prodigal with dummy data p = self.get_data_path("dir_with_1_mag") mags = MAGSequencesDirFmt(path=p, mode="r") - loci, genes, proteins = predict_genes_prodigal(mags=mags) + loci, genes, proteins = predict_genes_prodigal(sequences=mags) # Check that output is correct type self.assertIsInstance(loci, LociDirectoryFormat) @@ -57,7 +57,7 @@ def test_run_prodigal_feature_data_3_mag(self, subp_run): # Run prodigal with dummy data p = self.get_data_path("dir_with_3_mag") mags = MAGSequencesDirFmt(path=p, mode="r") - loci, genes, proteins = predict_genes_prodigal(mags=mags) + loci, genes, proteins = predict_genes_prodigal(sequences=mags) # Check that output is correct type self.assertIsInstance(loci, LociDirectoryFormat) @@ -90,7 +90,7 @@ def test_run_prodigal_feature_data_3_mag(self, subp_run): def test_run_prodigal_sample_data(self, subp_run): p = self.get_data_path("") mags = MultiMAGSequencesDirFmt(path=p, mode="r") - loci, genes, prot = predict_genes_prodigal(mags=mags) + loci, genes, prot = predict_genes_prodigal(sequences=mags) # Check that output is correct type self.assertIsInstance(loci, LociDirectoryFormat) From 094bb747145cdacfc5aa65d4fd4a349a9503a392 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 5 Feb 2025 17:06:08 +0100 Subject: [PATCH 3/7] added test --- .../sample1_contigs.fasta} | 0 ...311112c9-7f8b-460c-9cad-3864af3148c2.fasta | 0 ...11112c9-7f8b-460c-9cad-3864af3148c2.fasta} | 0 ...7123d05-b5ae-4a53-873b-727952881899.fasta} | 0 ...67392c6c-9f45-4c84-85f5-ae0bfc668892.fasta | 0 q2_annotate/prodigal/tests/test_prodigal.py | 25 ++++++++++++++++--- 6 files changed, 21 insertions(+), 4 deletions(-) rename q2_annotate/prodigal/tests/data/{dir_with_1_mag/311112c9-7f8b-460c-9cad-3864af3148c2.fasta => contigs/sample1_contigs.fasta} (100%) rename q2_annotate/prodigal/tests/data/{dir_with_3_mag => mags/dir_with_1_mag}/311112c9-7f8b-460c-9cad-3864af3148c2.fasta (100%) rename q2_annotate/prodigal/tests/data/{dir_with_3_mag/67123d05-b5ae-4a53-873b-727952881899.fasta => mags/dir_with_3_mag/311112c9-7f8b-460c-9cad-3864af3148c2.fasta} (100%) rename q2_annotate/prodigal/tests/data/{dir_with_3_mag/67392c6c-9f45-4c84-85f5-ae0bfc668892.fasta => mags/dir_with_3_mag/67123d05-b5ae-4a53-873b-727952881899.fasta} (100%) create mode 100644 q2_annotate/prodigal/tests/data/mags/dir_with_3_mag/67392c6c-9f45-4c84-85f5-ae0bfc668892.fasta diff --git a/q2_annotate/prodigal/tests/data/dir_with_1_mag/311112c9-7f8b-460c-9cad-3864af3148c2.fasta b/q2_annotate/prodigal/tests/data/contigs/sample1_contigs.fasta similarity index 100% rename from q2_annotate/prodigal/tests/data/dir_with_1_mag/311112c9-7f8b-460c-9cad-3864af3148c2.fasta rename to q2_annotate/prodigal/tests/data/contigs/sample1_contigs.fasta diff --git a/q2_annotate/prodigal/tests/data/dir_with_3_mag/311112c9-7f8b-460c-9cad-3864af3148c2.fasta b/q2_annotate/prodigal/tests/data/mags/dir_with_1_mag/311112c9-7f8b-460c-9cad-3864af3148c2.fasta similarity index 100% rename from q2_annotate/prodigal/tests/data/dir_with_3_mag/311112c9-7f8b-460c-9cad-3864af3148c2.fasta rename to q2_annotate/prodigal/tests/data/mags/dir_with_1_mag/311112c9-7f8b-460c-9cad-3864af3148c2.fasta diff --git a/q2_annotate/prodigal/tests/data/dir_with_3_mag/67123d05-b5ae-4a53-873b-727952881899.fasta b/q2_annotate/prodigal/tests/data/mags/dir_with_3_mag/311112c9-7f8b-460c-9cad-3864af3148c2.fasta similarity index 100% rename from q2_annotate/prodigal/tests/data/dir_with_3_mag/67123d05-b5ae-4a53-873b-727952881899.fasta rename to q2_annotate/prodigal/tests/data/mags/dir_with_3_mag/311112c9-7f8b-460c-9cad-3864af3148c2.fasta diff --git a/q2_annotate/prodigal/tests/data/dir_with_3_mag/67392c6c-9f45-4c84-85f5-ae0bfc668892.fasta b/q2_annotate/prodigal/tests/data/mags/dir_with_3_mag/67123d05-b5ae-4a53-873b-727952881899.fasta similarity index 100% rename from q2_annotate/prodigal/tests/data/dir_with_3_mag/67392c6c-9f45-4c84-85f5-ae0bfc668892.fasta rename to q2_annotate/prodigal/tests/data/mags/dir_with_3_mag/67123d05-b5ae-4a53-873b-727952881899.fasta diff --git a/q2_annotate/prodigal/tests/data/mags/dir_with_3_mag/67392c6c-9f45-4c84-85f5-ae0bfc668892.fasta b/q2_annotate/prodigal/tests/data/mags/dir_with_3_mag/67392c6c-9f45-4c84-85f5-ae0bfc668892.fasta new file mode 100644 index 00000000..e69de29b diff --git a/q2_annotate/prodigal/tests/test_prodigal.py b/q2_annotate/prodigal/tests/test_prodigal.py index 4aefaaa7..97c1ce4b 100644 --- a/q2_annotate/prodigal/tests/test_prodigal.py +++ b/q2_annotate/prodigal/tests/test_prodigal.py @@ -9,7 +9,7 @@ from q2_annotate.prodigal.prodigal import predict_genes_prodigal from qiime2.plugin.testing import TestPluginBase from q2_types.feature_data_mag import MAGSequencesDirFmt -from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt +from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt, ContigSequencesDirFmt from unittest.mock import patch, call from q2_types.genome_data import ( LociDirectoryFormat, GenesDirectoryFormat, ProteinsDirectoryFormat, @@ -22,7 +22,7 @@ class TestBUSCO(TestPluginBase): @patch("subprocess.run") def test_run_prodigal_feature_data_1_mag(self, subp_run): # Run prodigal with dummy data - p = self.get_data_path("dir_with_1_mag") + p = self.get_data_path("mags/dir_with_1_mag") mags = MAGSequencesDirFmt(path=p, mode="r") loci, genes, proteins = predict_genes_prodigal(sequences=mags) @@ -55,7 +55,7 @@ def test_run_prodigal_feature_data_1_mag(self, subp_run): @patch("subprocess.run") def test_run_prodigal_feature_data_3_mag(self, subp_run): # Run prodigal with dummy data - p = self.get_data_path("dir_with_3_mag") + p = self.get_data_path("mags/dir_with_3_mag") mags = MAGSequencesDirFmt(path=p, mode="r") loci, genes, proteins = predict_genes_prodigal(sequences=mags) @@ -88,7 +88,7 @@ def test_run_prodigal_feature_data_3_mag(self, subp_run): @patch("subprocess.run") def test_run_prodigal_sample_data(self, subp_run): - p = self.get_data_path("") + p = self.get_data_path("mags") mags = MultiMAGSequencesDirFmt(path=p, mode="r") loci, genes, prot = predict_genes_prodigal(sequences=mags) @@ -117,3 +117,20 @@ def test_run_prodigal_sample_data(self, subp_run): # Assert that patch was called 3 times subp_run.assert_has_calls(calls, any_order=True) + + + @patch("subprocess.run") + def test_run_prodigal_contigs(self, subp_run): + contigs = ContigSequencesDirFmt(self.get_data_path("contigs"), mode="r") + loci, genes, prot = predict_genes_prodigal(sequences=contigs) + + subp_run.assert_called_once_with([ + "prodigal", + "-g", "11", + "-f", "gff", + "-i", os.path.join(contigs.path, "sample1_contigs.fasta"), + "-o", os.path.join(loci.path, "sample1.gff"), + "-a", os.path.join(prot.path, "sample1.fasta"), + "-d", os.path.join(genes.path, "sample1.fasta")], + check=True + ) \ No newline at end of file From 1cee5fb8a9c536bfc650c3b5dee8b1f3b2242a55 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 5 Feb 2025 17:07:49 +0100 Subject: [PATCH 4/7] lint --- q2_annotate/prodigal/tests/test_prodigal.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/q2_annotate/prodigal/tests/test_prodigal.py b/q2_annotate/prodigal/tests/test_prodigal.py index 97c1ce4b..2ed0ff1e 100644 --- a/q2_annotate/prodigal/tests/test_prodigal.py +++ b/q2_annotate/prodigal/tests/test_prodigal.py @@ -118,7 +118,6 @@ def test_run_prodigal_sample_data(self, subp_run): # Assert that patch was called 3 times subp_run.assert_has_calls(calls, any_order=True) - @patch("subprocess.run") def test_run_prodigal_contigs(self, subp_run): contigs = ContigSequencesDirFmt(self.get_data_path("contigs"), mode="r") @@ -133,4 +132,4 @@ def test_run_prodigal_contigs(self, subp_run): "-a", os.path.join(prot.path, "sample1.fasta"), "-d", os.path.join(genes.path, "sample1.fasta")], check=True - ) \ No newline at end of file + ) From 49f15330a8b5f414133036be1f459021679abb85 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 6 Feb 2025 16:32:09 +0100 Subject: [PATCH 5/7] changes after review --- q2_annotate/plugin_setup.py | 14 +++++++------- q2_annotate/prodigal/prodigal.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/q2_annotate/plugin_setup.py b/q2_annotate/plugin_setup.py index b6f2347a..8924e2aa 100644 --- a/q2_annotate/plugin_setup.py +++ b/q2_annotate/plugin_setup.py @@ -1234,13 +1234,13 @@ ('proteins', GenomeData[Proteins]) ], output_descriptions={ - 'loci': "Gene coordinates files (one per MAG) listing the location of " - "each predicted gene as well as some additional scoring " - "information. ", - 'genes': "Fasta files (one per MAG) with the nucleotide sequences of " - "the predicted genes.", - 'proteins': "Fasta files (one per MAG) with the protein translation " - "of the predicted genes." + 'loci': "Gene coordinates files (one per MAG or contig) listing the " + "location of each predicted gene as well as some additional " + "scoring information. ", + 'genes': "Fasta files (one per MAG or contig) with the nucleotide " + "sequences of the predicted genes.", + 'proteins': "Fasta files (one per MAG or contig) with the protein " + "translation of the predicted genes." }, name='Predict gene sequences from MAGs or contigs using Prodigal.', description="Prodigal (PROkaryotic DYnamic programming " diff --git a/q2_annotate/prodigal/prodigal.py b/q2_annotate/prodigal/prodigal.py index a377acd1..7886440d 100644 --- a/q2_annotate/prodigal/prodigal.py +++ b/q2_annotate/prodigal/prodigal.py @@ -51,12 +51,12 @@ def _run_prodigal(path_to_input: str, _id: str, subdir: str = None): run_command(cmd) if isinstance(sequences, MAGSequencesDirFmt): - for _id, mag_fp in sequences.feature_dict().items(): - _run_prodigal(mag_fp, _id) + for mag_id, mag_fp in sequences.feature_dict().items(): + _run_prodigal(mag_fp, mag_id) elif isinstance(sequences, ContigSequencesDirFmt): - for mag_id, mag_fp in sequences.sample_dict().items(): - _run_prodigal(mag_fp, mag_id) + for sample_id, contigs_fp in sequences.sample_dict().items(): + _run_prodigal(contigs_fp, sample_id) elif isinstance(sequences, MultiMAGSequencesDirFmt): for sample_id, mags_dict in sequences.sample_dict().items(): From d8853be9473073d4461317af2e823fa02cabccc8 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 7 Feb 2025 13:29:53 +0100 Subject: [PATCH 6/7] changed contig to sample in plugin setup descriptions --- q2_annotate/plugin_setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/q2_annotate/plugin_setup.py b/q2_annotate/plugin_setup.py index 8924e2aa..265026ae 100644 --- a/q2_annotate/plugin_setup.py +++ b/q2_annotate/plugin_setup.py @@ -1234,12 +1234,12 @@ ('proteins', GenomeData[Proteins]) ], output_descriptions={ - 'loci': "Gene coordinates files (one per MAG or contig) listing the " + 'loci': "Gene coordinates files (one per MAG or sample) listing the " "location of each predicted gene as well as some additional " "scoring information. ", - 'genes': "Fasta files (one per MAG or contig) with the nucleotide " + 'genes': "Fasta files (one per MAG or sample) with the nucleotide " "sequences of the predicted genes.", - 'proteins': "Fasta files (one per MAG or contig) with the protein " + 'proteins': "Fasta files (one per MAG or sample) with the protein " "translation of the predicted genes." }, name='Predict gene sequences from MAGs or contigs using Prodigal.', From 4b367c72a4872a189bc4c04eb4b49fad75b7116b Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 7 Feb 2025 15:04:36 +0100 Subject: [PATCH 7/7] changed call assertation to solve macos Ci failure --- q2_annotate/eggnog/tests/test_dbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_annotate/eggnog/tests/test_dbs.py b/q2_annotate/eggnog/tests/test_dbs.py index 351ac6eb..55d3d1ae 100644 --- a/q2_annotate/eggnog/tests/test_dbs.py +++ b/q2_annotate/eggnog/tests/test_dbs.py @@ -220,7 +220,7 @@ def test_fetch_ncbi_taxonomy(self, mock_os_rm, mock_run, mock_md5): ] # Check that commands are ran as expected - mock_os_rm.assert_called_once_with(zip_path) + mock_os_rm.assert_any_call(zip_path) mock_run.assert_has_calls( expected_calls, any_order=False