Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fixes Bug in K22F that is caused by infra-clades as terminal taxons #237

Merged
merged 2 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions q2_annotate/kraken2/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,14 @@ def _combine_ncbi_trees(trees):
continue # for clarity
else:
parents = list(tip.ancestors())[:-1] # ignore unnamed root

# check if node is a infra-clade (infra-clades have length 0).
# then adds self to ancestor list, if it is an infra-clade.
# this mimics what happens if the node isn't an infra-clade.
# i.e node had an id_node and then self gets added to the
# list of ancestors if you call .parent on an id_node.
if tip.length == 0:
parents.insert(0, tip)
matching = full_tree
subtree_inserted = False
while parents and not subtree_inserted:
Expand All @@ -243,6 +251,12 @@ def _combine_ncbi_trees(trees):
break
if not ancestor_found:
matching.append(node)
# This may be overkill but this checks to make sure
# that the tip is an infra clade (tip.length = 0)
# and doesn't have children. If thats these are both
# true then add this tip to tip cache.
if len(tip.children) == 0 and tip.length == 0:
tip_cache[tip.name] = tip
for t in node.tips():
tip_cache[t.name] = t
assert tip.name in tip_cache
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
100 9332144 0 R 1 root
96.08 8966446 0 R1 131567 cellular organisms
5.3 494861 0 D 2759 Eukaryota
4.81 448908 0 D1 33154 Opisthokonta
4.54 423718 0 K 33208 Metazoa
4.53 422551 0 K1 6072 Eumetazoa
4.5 419698 0 K2 33213 Bilateria
3.42 318923 0 K3 33317 Protostomia
3.1 289656 0 K4 1206794 Ecdysozoa
3.07 286854 0 K5 88770 Panarthropoda
3.07 286847 0 P 6656 Arthropoda
3.01 281163 0 P1 197563 Mandibulata
3.01 281054 0 P2 197562 Pancrustacea
2.94 274151 0 P3 6960 Hexapoda
2.93 273688 0 C 50557 Insecta
2.93 273688 0 C1 85512 Dicondylia
2.93 273688 0 C2 7496 Pterygota
2.92 272146 0 C3 33340 Neoptera
0.06 5786 0 C4 33342 Paraneoptera
0.06 5578 0 O 7524 Hemiptera
0.02 2099 0 O1 33343 Prosorrhyncha
0.02 2099 0 O2 33345 Heteroptera
0.02 2099 0 O3 33347 Euheteroptera
0.02 2099 0 O4 33349 Neoheteroptera
0.02 1673 0 O5 33351 Panheteroptera
0.01 962 0 O6 33354 Cimicomorpha
0.01 935 0 O7 33355 Cimicoidea
0.01 927 0 F 30083 Miridae
0 228 0 F1 236635 Phylinae
0 228 0 F2 236648 Pilophorini
0 228 0 G 237084 Pilophorus
0.27 25092 0 K 4751 Fungi
0.24 22558 0 K1 451864 Dikarya
0.22 20530 0 P 4890 Ascomycota
0.22 20419 0 P1 716545 saccharomyceta
0.2 18909 0 P2 147538 Pezizomycotina
0.2 18778 0 P3 716546 leotiomyceta
0 58 0 C 147547 Lecanoromycetes
0 58 0 C1 1520881 OSLEUM clade
0 58 0 C2 388435 Lecanoromycetidae
0 58 0 O 5197 Lecanorales
0 58 0 O1 157822 Lecanorineae
0 53 0 F 5198 Cladoniaceae
0 53 0 G 5199 Cladonia
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
100 9332144 0 R 1 root
96.08 8966446 0 R1 131567 cellular organisms
5.3 494861 0 D 2759 Eukaryota
4.81 448908 0 D1 33154 Opisthokonta
4.54 423718 0 K 33208 Metazoa
4.53 422551 0 K1 6072 Eumetazoa
4.5 419698 0 K2 33213 Bilateria
3.42 318923 0 K3 33317 Protostomia
3.1 289656 0 K4 1206794 Ecdysozoa
3.07 286854 0 K5 88770 Panarthropoda
3.07 286847 0 P 6656 Arthropoda
3.01 281163 0 P1 197563 Mandibulata
3.01 281054 0 P2 197562 Pancrustacea
2.94 274151 0 P3 6960 Hexapoda
2.93 273688 0 C 50557 Insecta
2.93 273688 0 C1 85512 Dicondylia
2.93 273688 0 C2 7496 Pterygota
2.92 272146 0 C3 33340 Neoptera
0.06 5786 0 C4 33342 Paraneoptera
0.06 5578 0 O 7524 Hemiptera
0.02 2099 0 O1 33343 Prosorrhyncha
0.02 2099 0 O2 33345 Heteroptera
0.02 2099 0 O3 33347 Euheteroptera
0.02 2099 0 O4 33349 Neoheteroptera
0.02 1673 0 O5 33351 Panheteroptera
0.01 962 0 O6 33354 Cimicomorpha
0.01 935 0 O7 33355 Cimicoidea
0.01 927 0 F 30083 Miridae
0 228 0 F1 236635 Phylinae
0 228 0 F2 236648 Pilophorini
0 228 0 G 237084 Pilophorus
0.27 25092 0 K 4751 Fungi
0.24 22558 0 K1 451864 Dikarya
0.22 20530 0 P 4890 Ascomycota
0.22 20419 0 P1 716545 saccharomyceta
0.2 18909 0 P2 147538 Pezizomycotina
0.2 18778 0 P3 716546 leotiomyceta
0 58 0 C 147547 Lecanoromycetes
0 58 0 C1 1520881 OSLEUM clade
0 58 0 C2 388435 Lecanoromycetidae
0 58 0 O 5197 Lecanorales
0 58 0 O1 157822 Lecanorineae
0 53 0 F 5198 Cladoniaceae
0 53 0 G 5199 Cladonia
0 53 0 G1 51991 Cladonia 2
12 changes: 12 additions & 0 deletions q2_annotate/kraken2/tests/test_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,18 @@ def setUp(self):
def tearDown(self):
shutil.rmtree(self.temp_dir)

def test_kraken2_to_features_infra_clade(self):
reports = Kraken2ReportDirectoryFormat(
self.get_data_path("infra-clade/"), "r"
)
obs_table, obs_taxonomy = kraken2_to_features(
reports, coverage_threshold=0.0)
# Check that expected taxons(tip of the tree) are in the taxonomy/table
assert '237084' in obs_taxonomy.index
assert '237084' in obs_table.columns
assert '5199' in obs_taxonomy.index
assert '5199' in obs_table.columns

def test_kraken2_to_features_duplicated_genus(self):
reports = Kraken2ReportDirectoryFormat(
self.get_data_path("duplicated-genus/"), "r"
Expand Down
Loading