-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautoDS.py
executable file
·104 lines (81 loc) · 4.56 KB
/
autoDS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/python
"Automatically creates a PDB:DNA data set."
from optparse import OptionParser, OptionGroup
from data import *
#Xiaqng's change
##import optparse
#-------------------------------------------------------------------------------
# Command line options
#-------------------------------------------------------------------------------
optpars = OptionParser(usage="usage: %prog [options] filename",
version="%prog 0.1")
office_Mac_address='/Users/xji3/clemensCode'
#home_pc_address='G:/Dropbox/My Files/BRC/Small Project/Clemens PY code'
#laptop_address='E:/Dropbox/Dropbox/My Files/BRC/Small Project/Clemens PY code'
address=office_Mac_address
CnFolder='/CnDataOutput'
CaFolder='/CaOutput'
OutputFolder=CaFolder
group = OptionGroup(optpars, "Mandatory options")
group.add_option("-d", "--dsdir", action="store", dest="ds_dir", default=(address+OutputFolder),
help='Mandatory: data-set output directory')
group.add_option("-f", "--file", action="store", dest="xmlfilename", default=(address+'/input/pdb_search.xml'),
help='Mandatory: XML file for PDB query')
group.add_option("-p", "--pisadir", action="store", dest="local_pisadir", default=(address+OutputFolder+'/pisa'),
help='Mandatory: local dir where Pisa files should be stored')
group.add_option("-c", "--criterion", action="store", dest="DistCriterion", default=('Ca-Ca'),# Choose between C-N or Ca-Ca
help='Mandatory: distance criterion')
optpars.add_option_group(group)
#Xiaqng's change
(opts,args) = optpars.parse_args()
##print opts
##print args
mandatories = ['ds_dir','xmlfilename','local_pisadir']
for m in mandatories:
if not opts.__dict__[m]:
print "mandatory option is missing\n"
optpars.print_help()
exit(-1)
#################################################
optpars.add_option("-i", "--initpdb", action="store", dest="initpdb_filename", default=(address+'/input/init_pdbids.txt'),
help='Skip PDB search, read PDB-IDs from this (single-column) file instead')
optpars.add_option("-m", "--initmp", action="store", dest="initmp_filename", default=(address+'/input/init_mpids.txt'),
help='Skip mpstruc search, read PDB-IDs from this (single-column) file instead')
optpars.add_option("-o", "--initmono", action="store", dest="initmono_filename", default=(address+'/input/monomers.txt'),
help='Skip Pisa step, read monomer PDB-IDs from this (single-column) file instead')
##args=['-d','aaa','-f','bbb','-p','ccc']
##optpars.set_defaults(xmlfilename='aaa')
(options, args) = optpars.parse_args()
#-------------------------------------------------------------------------------
if __name__ == '__main__':
mandatory = ['xmlfilename', 'local_pisadir', 'ds_dir']
for m in mandatory:
if not options.__dict__[m]:
optpars.print_help()
optpars.error('At least one mandatory option is missing.\n')
#---------------------------------------------------------------------------
# Initialize Data object
#---------------------------------------------------------------------------
if options.ds_dir[-1] != '/': options.ds_dir = options.ds_dir + '/'
data = Data(options.initpdb_filename, options.xmlfilename, options.ds_dir,options.DistCriterion)
#---------------------------------------------------------------------------
# Apply additional filters (no membrane proteins, only monomers, ...)
#---------------------------------------------------------------------------
data.filterMembraneProteins(options.initmp_filename)
data.filterNonMonomers(options.initmono_filename, options.local_pisadir)
#---------------------------------------------------------------------------
# Get the AA sequences for the PDB files and remove those with non-unique
# chains
#---------------------------------------------------------------------------
data.fasta4pdb()
data.removeNonUniqueChainPDBs()
#---------------------------------------------------------------------------
# Map PDB-IDs to CCDS-IDs (and keep track of all other mappings)
#---------------------------------------------------------------------------
data.mapPDB2CCDS()
data.filterByCCDSalignments(50, 97.0)
#---------------------------------------------------------------------------
# Print results to files
#---------------------------------------------------------------------------
data.printResults()
#-------------------------------------------------------------------------------