Skip to content

Commit

Permalink
modified: setup.py
Browse files Browse the repository at this point in the history
	modified:   svdb/__main__.py
	modified:   svdb/merge_vcf_module.py
	modified:   svdb/merge_vcf_module_cython.py
  • Loading branch information
J35P312 committed Sep 6, 2017
1 parent d11f26e commit 1aae0b5
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 7 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name = 'svdb',
version = '1.0.6',
version = '1.0.7',
ext_modules = ext_modules,
packages = ['svdb'],
install_requires = ['numpy', 'scikit-learn==0.15.2', 'scipy'],
Expand Down
3 changes: 2 additions & 1 deletion svdb/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from . import bed_annotation_module

def main():
version = "1.0.6"
version = "1.0.7"
parser = argparse.ArgumentParser("""SVDB-{}, use the build module to construct databases, use the query module to query the database usign vcf files, or use the hist module to generate histograms""".format(version),add_help=False)
parser.add_argument('--build' , help="create a db", required=False, action="store_true")
parser.add_argument('--hist' , help="generate histograms o the performance of a db", required=False, action="store_true")
Expand Down Expand Up @@ -127,6 +127,7 @@ def main():
elif args.merge:
parser = argparse.ArgumentParser("""SVDB-{}: vcf_merge module""".format(version))
parser.add_argument('--merge', help="merge structural variants", required=False, action="store_true")
parser.add_argument('--notag', help="Do not add the the VARID and set entries to the info field", required=False, action="store_true")
parser.add_argument('--vcf', nargs='*', type=str, help="input vcf files, all input vcf files will be merged into one. Use the --prioriy flag to prioritize the callers/vcf files",required=True)
parser.add_argument('--priority', type=str, help="prioritise the input files, using the following format --vcf caller1.vcf:2 caller2.vcf:1 --priority: 1,2")
parser.add_argument('--bnd_distance', type=int,default= 2000,help="the maximum distance between two similar precise breakpoints(default = 2000)")
Expand Down
7 changes: 5 additions & 2 deletions svdb/merge_vcf_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def print_header(vcf_list,vcf_dictionary,args,command_line):
for sample in vcf_columns[9:]:
sample_order[sample][vcf_dictionary[vcf]]=i
i += 1

elif "<ID=VARID," in line or "<ID=set," in line:
continue
elif line[0] == line[1] and "=" in line:
if("ID=" in line and not "##contig=<ID=" in line):
field=line.split("=")[2].split(",")[0]
Expand Down Expand Up @@ -90,7 +91,9 @@ def print_header(vcf_list,vcf_dictionary,args,command_line):
#print subheaders
for entry in sorted(subheader):
print(subheader[entry].strip())
print("##INFO=<ID=VARID,Number=1,Type=String,Description=\"The variant ID of merged samples\">")
if not args.notag:
print("##INFO=<ID=VARID,Number=1,Type=String,Description=\"The variant ID of merged samples\">")
print("##INFO=<ID=set,Number=1,Type=String,Description=\"Source VCF for the merged record in SVDB\">")
print("##svdbcmdline={}".format(" ".join(command_line)))
sample_print_order={}
if sample_ids:
Expand Down
39 changes: 36 additions & 3 deletions svdb/merge_vcf_module_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,31 @@ def retrieve_key(line,key):
return(False)
return(item)

def determine_set_tag(priority_order,files):
n_filtered=0
n_pass=0

filtered=[]
for sample in priority_order:
if sample in files:
if files[sample].split("\t")[6] == "PASS" or files[sample].split("\t")[6] == ".":
n_pass+=1
else:
n_filtered += 1
if n_pass == len(priority_order):
return("Intersection")
elif n_filtered == len(priority_order):
return("FilteredInAll")
else:
for sample in priority_order:
if not sample in files:
continue
elif files[sample].split("\t")[6] == "PASS" or files[sample].split("\t")[6] == ".":
filtered.append(sample)
else:
filtered.append("filterIn" + sample)
return("-".join(filtered))

def get_CIPOS_CEND(query_variant):
ciA_query=[0,0]
CIPOS=retrieve_key(query_variant[-1],"CIPOS")
Expand Down Expand Up @@ -105,7 +130,7 @@ def sort_format_field(line,samples,sample_order,sample_print_order,priority_orde
format_string.append(",".join(sub_entry))
else:
if entry == "GT":
format_string.append("0/0")
format_string.append("./.")
else:
sub_entry=[]
for i in range(0,format_entry_length[j]+1):
Expand Down Expand Up @@ -229,8 +254,16 @@ def merge(variants,samples,sample_order,sample_print_order,priority_order,args):
representing_file = variants[chrA][i][-3].replace(".vcf","").split("/")[-1]

line=sort_format_field(line,samples,sample_order,sample_print_order,priority_order,files, representing_file,args)
if merge:
line[7] += ";VARID=" + "|".join(merge)
if merge and not args.notag:
line[7] += ";VARID=" + "|".join(merge)
#print "printing"
#print samples
#print priority_order
#print files
#print representing_file
if not args.notag:
set_tag=determine_set_tag(priority_order,files)
line[7] += ";set={}".format(set_tag);
to_be_printed[line[0]].append(line)

analysed_variants.add(i)
Expand Down

0 comments on commit 1aae0b5

Please sign in to comment.