diff --git a/docs/src/reference/change_log.md b/docs/src/reference/change_log.md index c7d60f193..bf16dcd6e 100644 --- a/docs/src/reference/change_log.md +++ b/docs/src/reference/change_log.md @@ -5,6 +5,10 @@ We also use this change log to document new features that maintain backward comp ## New features since last version update +## v13 (16 May 2022) + +- 16 May 2023: Update workflow to support [Augur v22](https://github.com/nextstrain/augur/releases/tag/22.0.0) which updates the `augur clades` interface and structure of the output files to allow specifying the clade label & coloring keys. Because we use custom scripts to parse these files this worflow also needed updating. This change results in a simplifying of the nCoV pipeline (PR [1000](https://github.com/nextstrain/ncov/pull/1000)). + - 11 April 2023: Elevate XBB.1.16 as new clade 23B. See [PR 1059](https://github.com/nextstrain/ncov/pull/1059) for the rationale behind this clade update. - 6 April 2023: Update conda environment dependencies: augur 19.2.0 -> 21.1.0, nextalign/nextclade 2.9.1 -> 2.13.1, iqtree 2.2.0_beta -> 2.2.0.3. [PR 1056](https://github.com/nextstrain/ncov/pull/1056) diff --git a/scripts/add_branch_labels.py b/scripts/add_branch_labels.py deleted file mode 100644 index 613d28df3..000000000 --- a/scripts/add_branch_labels.py +++ /dev/null @@ -1,63 +0,0 @@ -import argparse -import json -from Bio import Phylo -from collections import defaultdict - -def extract_spike_mutations(node_data): - data = {} - for name, node in node_data["nodes"].items(): - smuts = node.get("aa_muts", {}).get("S", []) - if smuts: - data[name] = ", ".join(smuts) - return data - -def extract_clade_labels(node_data): - data = {} - for name, node in node_data["nodes"].items(): - if "clade_annotation" in node: - data[name] = node["clade_annotation"] - return data - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="Remove extraneous colorings", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument('--input', type=str, metavar="JSON", required=True, help="input Auspice JSON") - parser.add_argument('--mutations', type=str, required=False, help="mutations node data file") - parser.add_argument('--emerging-clades', type=str, required=True, help="emerging clades node data file") - parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") - args = parser.parse_args() - - with open(args.input, "r") as f: - auspice_json = json.load(f) - - if args.mutations: - with open(args.mutations, "r") as f: - spike_mutations = extract_spike_mutations(json.load(f)) - else: - spike_mutations = {} - - with open(args.emerging_clades, "r") as f: - clade_labels = extract_clade_labels(json.load(f)) - - def attach_labels(n): # closure - if n["name"] in spike_mutations or n["name"] in clade_labels: - if "branch_attrs" not in n: - n["branch_attrs"]={} - if "labels" not in n["branch_attrs"]: - n["branch_attrs"]["labels"]={} - if n["name"] in spike_mutations: - n["branch_attrs"]["labels"]["spike_mutations"] = spike_mutations[n["name"]] - if n["name"] in clade_labels: - n["branch_attrs"]["labels"]["emerging_lineage"] = clade_labels[n["name"]] - - if "children" in n: - for c in n["children"]: - attach_labels(c) - - attach_labels(auspice_json["tree"]) - - with open(args.output, 'w') as f: - json.dump(auspice_json, f, indent=2) diff --git a/workflow/envs/nextstrain.yaml b/workflow/envs/nextstrain.yaml index 00cfbb3af..c2b1ce23a 100644 --- a/workflow/envs/nextstrain.yaml +++ b/workflow/envs/nextstrain.yaml @@ -4,7 +4,7 @@ channels: - bioconda - defaults dependencies: - - augur=21.1.0 + - augur=22.0.1 - epiweeks=2.1.2 - iqtree=2.2.0.3 - nextalign=2.13.1 diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index 7dfa61b35..70177bd61 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -1075,7 +1075,7 @@ rule emerging_lineages: emerging_lineages = config["files"]["emerging_lineages"], clades = config["files"]["clades"] output: - clade_data = "results/{build_name}/temp_emerging_lineages.json" + clade_data = "results/{build_name}/emerging_lineages.json" log: "logs/emerging_lineages_{build_name}.txt" benchmark: @@ -1089,27 +1089,11 @@ rule emerging_lineages: augur clades --tree {input.tree} \ --mutations {input.nuc_muts} {input.aa_muts} \ --clades {input.emerging_lineages} \ + --membership-name emerging_lineage \ + --label-name emerging_lineage \ --output-node-data {output.clade_data} 2>&1 | tee {log} """ -rule rename_emerging_lineages: - input: - node_data = rules.emerging_lineages.output.clade_data - output: - clade_data = "results/{build_name}/emerging_lineages.json" - benchmark: - "benchmarks/rename_emerging_lineages_{build_name}.txt" - run: - import json - with open(input.node_data, 'r', encoding='utf-8') as fh: - d = json.load(fh) - new_data = {} - for k,v in d['nodes'].items(): - if "clade_membership" in v: - new_data[k] = {"emerging_lineage": v["clade_membership"]} - with open(output.clade_data, "w") as fh: - json.dump({"nodes": new_data}, fh, indent=2) - rule colors: message: "Constructing colors file" input: @@ -1366,8 +1350,8 @@ def _get_node_data_by_wildcards(wildcards): rules.refine.output.node_data, rules.ancestral.output.node_data, rules.translate.output.node_data, - rules.rename_emerging_lineages.output.clade_data, rules.clades.output.clade_data, + rules.emerging_lineages.output.clade_data, rules.recency.output.node_data, rules.traits.output.node_data, rules.logistic_growth.output.node_data, @@ -1462,28 +1446,10 @@ rule export: --output {output.auspice_json} 2>&1 | tee {log} """ -rule add_branch_labels: - message: "Adding custom branch labels to the Auspice JSON" - input: - auspice_json = rules.export.output.auspice_json, - emerging_clades = rules.emerging_lineages.output.clade_data - output: - auspice_json = "results/{build_name}/ncov_with_branch_labels.json" - log: - "logs/add_branch_labels{build_name}.txt" - conda: config["conda_environment"] - shell: - """ - python3 ./scripts/add_branch_labels.py \ - --input {input.auspice_json} \ - --emerging-clades {input.emerging_clades} \ - --output {output.auspice_json} - """ - rule include_hcov19_prefix: message: "Rename strains to include hCoV-19/ prefix" input: - auspice_json = rules.add_branch_labels.output.auspice_json, + auspice_json = rules.export.output.auspice_json, tip_frequencies = rules.tip_frequencies.output.tip_frequencies_json output: auspice_json = "results/{build_name}/ncov_with_hcov19_prefix.json",