-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathntroot
executable file
·178 lines (143 loc) · 6.82 KB
/
ntroot
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/env python3
'''
Driver script for ntRoot
'''
import argparse
import os
import shlex
import subprocess
from packaging import version
import snakemake
NTROOT_VERSION = "v1.1.6"
def set_up_parser():
"Set-up the ntRoot argparse parser"
parser = argparse.ArgumentParser(description="ntRoot: Ancestry inference from genomic data",
formatter_class=argparse.RawTextHelpFormatter,
epilog="Note: please specify --reads OR --genome (not both)\n"
"If you have any questions about ntRoot, please open an "
"issue at https://github.com/bcgsc/ntRoot")
parser.add_argument("--draft",
help=argparse.SUPPRESS)
parser.add_argument("-r", "--reference",
help="Reference genome (FASTA, Multi-FASTA, and/or gzipped compatible)")
parser.add_argument("--reads",
help="Prefix of input reads file(s) for detecting SNVs. "
"All files in the working directory with the specified prefix will be used. "
"(fastq, fasta, gz, bz, zip)", type=str)
parser.add_argument("--genome",
help="Genome assembly file(s) for detecting SNVs compared to --reference", nargs="+")
parser.add_argument("-l",
help="input VCF file with annotated variants (e.g., clinvar.vcf, 1000GP_integrated_snv_v2a_27022019.GRCh38.phased_gt1.vcf.gz)",
type=str, required=True)
parser.add_argument("-k",
help="k-mer size",
required=False, type=int)
parser.add_argument("--tile", help="Tile size for ancestry fraction inference (bp) [default=5000000]",
default=5000000, type=int)
parser.add_argument("--lai", help="Output ancestry predictons per tile in a separate output file",
action="store_true")
parser.add_argument("-t",
help="Number of threads [default=4]", default=4, type=int)
parser.add_argument("-z",
help="Minimum contig length [default=100]", default=100, type=int)
parser.add_argument("-j",
help="controls size of k-mer subset. When checking subset of k-mers, check every jth k-mer "
"[default=3]",
default=3, type=int)
parser.add_argument("-Y",
help="Ratio of number of k-mers in the k subset that should be present to accept "
"an edit (higher=stringent) "
"[default=0.55]", default=0.55, type=float)
parser.add_argument("--custom_vcf", help="Input VCF for computing ancestry. "
"When specified, ntRoot will skip the ntEdit step, and "
"predict ancestry from the provided VCF.",
type=str)
parser.add_argument("--strip_info", help="When using --custom_vcf, strip the existing INFO field from the input VCF.",
action="store_true")
parser.add_argument("-v", "--verbose",
help="Verbose mode [default=False]", action="store_true", default=False)
parser.add_argument("-V", "--version", action="version", version=NTROOT_VERSION)
parser.add_argument("-n", "--dry-run", help="Print out the commands that will be executed", action="store_true")
parser.add_argument("-f", "--force", help="Run all ntRoot steps, regardless of existing output files",
action="store_true")
return parser
def main():
"Run ntRoot"
parser = set_up_parser()
args = parser.parse_args()
if ((args.reads and args.genome) or (not args.reads and not args.genome)) and not args.custom_vcf:
raise argparse.ArgumentTypeError("Please specify --reads OR --genome")
if not args.draft and not args.reference:
raise argparse.ArgumentTypeError("Please specify the reference genome with --reference")
if args.draft:
args.reference = args.draft
if args.reads or args.genome:
if not args.k:
raise argparse.ArgumentTypeError("Please specify the k-mer size with -k")
base_dir = os.path.dirname(os.path.realpath(__file__))
intro_string = ["Running ntRoot...",
"Parameter settings:"]
if args.reads:
if args.lai:
smk_rule = "ntroot_reads_lai"
else:
smk_rule = "ntroot_reads"
if args.genome:
if args.lai:
smk_rule = "ntroot_genome_lai"
else:
smk_rule = "ntroot_genome"
if args.custom_vcf:
if args.lai:
smk_rule = "ntroot_input_vcf_lai"
else:
smk_rule = "ntroot_input_vcf"
command = f"snakemake -s {base_dir}/ntroot_run_pipeline.smk {smk_rule} --nolock -p --cores {args.t} " \
f"--config reference={args.reference} threads={args.t} " \
f"tile_size={args.tile} "
if args.genome:
intro_string.append(f"\t--genome {args.genome}")
command += f"genomes={args.genome}"
elif args.custom_vcf:
intro_string.append(f"\t--custom_vcf {args.custom_vcf}")
command += f" input_vcf={args.custom_vcf}"
if args.strip_info:
intro_string.append("\t--strip_info")
command += " strip_info=True"
else:
intro_string.append(f"\t--reads {args.reads}")
command += f"reads={args.reads}"
intro_string.extend([f"\t--reference {args.reference}",
f"\t-t {args.t}"])
if args.genome or args.reads:
intro_string.extend([f"\t-k {args.k}",
f"\t-z {args.z}",
f"\t-j {args.j}",
f"\t-Y {args.Y}"])
command += f" kmer={args.k} z_param={args.z} j_param={args.j} Y_param={args.Y}"
if args.lai:
intro_string.append("\t--lai")
if args.verbose:
intro_string.append("\t-v")
command += " verbose=1"
else:
command += " verbose=0"
if not os.path.isfile(args.l):
raise FileNotFoundError(f"VCF file {args.l} not found")
intro_string.append(f"\t-l {args.l}")
command += f" l_vcf={args.l}"
print("\n".join(intro_string), flush=True)
if version.parse(snakemake.__version__) >= version.parse("7.8.0"): # Keep behaviour consistent for smk versions
command += " --rerun-trigger mtime "
if args.dry_run:
command += " -n"
if args.force:
command += " -F"
print(f"Running {command}", flush=True)
command = shlex.split(command)
ret = subprocess.call(command)
if ret != 0:
raise subprocess.SubprocessError("ntRoot failed - check the logs for the error.")
print("Done ntRoot!")
if __name__ == "__main__":
main()