forked from shalgilab/DoGFinder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCommon_DoGs_annotation
79 lines (59 loc) · 2.58 KB
/
Common_DoGs_annotation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python2
from __future__ import division
import sys
import os, time
import pybedtools
import pandas as pd
import numpy as np
import pysam
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-comm', action='store', dest='COMM_LIST', help='DoGs annotation list seperated by ,',type=str)
parser.add_argument('-out', action='store', dest='OUT', help='Location of union DoGs annotation file ',type=str)
parser.add_argument('-suff', action='store', dest='base_name',default='', help='suffix name to output file',type=str)
usr_input = parser.parse_args()
path=usr_input.COMM_LIST
path_out=usr_input.OUT
out_name=usr_input.base_name
dog_ann_names=path.split(",")
for k in range(0,len(dog_ann_names)):
if not(os.path.isfile(dog_ann_names[k])):
print "No DoG annotation file in : ",dog_ann_names[k]
sys.exit()
if path_out is None:
print "Error: No -out argument"
sys.exit()
if path_out[-1]=='/' and len(path_out)>0:
path_out=path_out[:-1]
if out_name is not '':
out_name='_'+out_name
print "\n-----------------------------------------------------Common_DoGs-------------------------------------------------------------------------"
print "Output will be at: %s"%(path_out+"/")
## union on all annotation:
df_ann_annot= pd.DataFrame()
for k in range(0,len(dog_ann_names)):
ann_temp=pybedtools.BedTool(dog_ann_names[k]);
ann_temp=ann_temp.sort()
df_ann_temp=pybedtools.BedTool.to_dataframe(ann_temp);
df_ann_temp.columns=range(0,df_ann_temp.shape[1]);
df_ann_annot=[df_ann_annot,df_ann_temp]
df_ann_annot=pd.concat(df_ann_annot)
trans_annot=pybedtools.BedTool.from_dataframe(df_ann_annot)
trans_annot=trans_annot.sort()
merge_annot = trans_annot.merge(c=4,o='distinct',s=True);
df_merge_annot=pybedtools.BedTool.to_dataframe(merge_annot);
df_merge_annot.columns=['chrom','start','end','strand','name'];
df_merge_annot['score']=0;
df_merge_annot=df_merge_annot[['chrom','start','end','name','score','strand']]
df_merge_annot['name'] = df_merge_annot['name'].str.replace(',','&')
ann_merge=pybedtools.BedTool.from_dataframe(df_merge_annot)
##intersect on all ann:
annotation_main=pybedtools.BedTool(dog_ann_names[0])
for k in range(1,len(dog_ann_names)):
ann_temp=pybedtools.BedTool(dog_ann_names[k]);
ann_temp=ann_temp.sort()
inter_temp = annotation_main.intersect(ann_temp,s=True,wa=True)
annotation_main=inter_temp
path_dog_annot_temp="%s/common_dog_annotation%s.bed" % (path_out,out_name) ;
ann_merge.intersect(annotation_main,s=True,wa=True).saveas(path_dog_annot_temp);
print "Done, output file : %s" %(path_dog_annot_temp)