-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathalphafold-analyser.py
executable file
·240 lines (190 loc) · 6.87 KB
/
alphafold-analyser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#!/usr/local/bin/python3.7
# Import all relevant libraries
import argparse
import os
import pickle
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
# unpickle file and return dictionary
def depickler(pickle_input):
try:
with open(pickle_input, "rb") as f:
return pickle.load(f)
except EOFError:
print(
" Error: Data could not be found, predicted aligned error plotting failed\n"
)
except FileNotFoundError:
print(
" Error: File could not be found, predicted aligned error plotting failed\n"
)
# Plot plDDT
def plot_pLDDT(data, output):
fig, ax = plt.subplots(figsize=(20, 5))
plddt = data["plddt"]
position = [n + 1 for n, ele in enumerate(data["plddt"])]
# Confidence boundaries
ax.add_patch(Rectangle((0, 90), 800, 10, color="#024fcc"))
ax.add_patch(Rectangle((0, 70), 800, 20, color="#60c2e8"))
ax.add_patch(Rectangle((0, 50), 800, 20, color="#f37842"))
ax.add_patch(Rectangle((0, 0), 800, 50, color="#f9d613"))
# Plot
ax.plot(
position,
plddt,
color="black",
linewidth=2,
linestyle="-"
)
# Plot parameters
ax.set_ylim(0, 100)
ax.set_xlim(1, max(position) + 10)
ax.spines[["right", "top"]].set_visible(False)
ax.set_yticks([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
plddt_legend = {
"Very high (pLDDT > 90)": "#024fcc",
"High (90 > pLDDT > 70)": "#60c2e8",
"Low (70 > pLDDT > 50)": "#f37842",
"Very low (pLDDT < 50)": "#f9d613",
}
ax.legend(plddt_legend, title="pLDDT Confidence", prop={'size': 16}, bbox_to_anchor=(-0.15, 1))
# labels
ax.set_xlabel("Amino Acid Position")
ax.set_ylabel("pLDDT")
plt.savefig(f"{output}/plddt.png", dpi=1000, bbox_inches="tight")
# Plot Predicted Aligned Error
def plot_PAE(data, output):
fig, ax = plt.subplots(figsize=(6, 6))
ax.set_facecolor("white") # color background white
cmap = ax.imshow(
data["predicted_aligned_error"],
vmin=0,
vmax=data["max_predicted_aligned_error"],
cmap="Greens_r",
) # plot pae
ax.set_xlabel("Scored residue") # plot x-axis label
ax.set_ylabel("Aligned residue") # plot y-axis label
axins = inset_axes(ax, width="100%", height="5%", loc="lower center", borderpad=-7)
cbar = fig.colorbar(cmap, cax=axins, orientation="horizontal") # create color bar
cbar.set_label("Expected position error (Ångströms)")
plt.savefig(f"{output}/PAE.png", dpi=1000, bbox_inches="tight")
def make_plots(pickle, output):
data = depickler(pickle_input=pickle)
plot_PAE(data, output)
plot_pLDDT(data, output)
# Create a PyMOL session from the pdb file generated by AlphaFold
def protein_painter(pdb_input, output, pymol_binary):
# File path for the PyMol session
session_path = f"{output}/pLDDT.pse"
color_plddt = (
" color 0x024fcc, b < 100; color 0x60c2e8, b < 90; color 0xf37842, b < 70; color 0xf9d613, b < 50"
)
# Terminal Command to open pdb file, color protein by pLDDT (b-factor) and save the session in the output directory
pymol_command = f'{pymol_binary} -cq {str(pdb_input)} -d "{color_plddt}; save {session_path}"'
# Run terminal command
os.system(pymol_command)
if os.path.isfile(session_path):
print("\n pLDDT data visualised\n")
else:
print("\n Error: visualisation failed\n")
# Checks if a file exists
def is_valid_file(parser, arg):
if not os.path.exists(arg):
parser.error(f"Input file ({arg}) not found!")
else:
return arg
# Generate CLI and define arguments with Argparse
def cmd_lineparser():
parser = argparse.ArgumentParser(
prog="AlphaFold Analyser",
add_help=False,
formatter_class=argparse.RawTextHelpFormatter,
)
group_inputs = parser.add_argument_group("Inputs")
# Get pdb structure path
group_inputs.add_argument(
"-p",
"--pdb",
metavar="\b",
type=lambda x: is_valid_file(parser, x),
action="store",
help="path to pdb file - generates pLDDT coloured structure",
default=None
)
# Get pkl file path
group_inputs.add_argument(
"-l",
"--pkl",
metavar="\b",
type=lambda x: is_valid_file(parser, x),
action="store",
help="path to pkl file - generates predicted aligned error plot",
default=None,
)
# Get pkl file path
group_inputs.add_argument(
"-b",
"--binary",
metavar="\b",
type=str,
action="store",
help="path to pymol binary - Only required for analysing pdb (-p)",
default=None,
)
group_output = parser.add_argument_group("Outputs")
# Get output directory
group_output.add_argument(
"-o",
"--output",
metavar="\b",
type=str,
action="store",
help="directory to store all generated outputs",
default=None,
)
group_options = parser.add_argument_group("Options")
# Get Version
group_options.add_argument(
"-v", "--version", action="version", version="%(prog)s v1.0"
)
# Get help
group_options.add_argument(
"-h", "--help", action="help", help="show this help message and exit\n "
)
# Parse arguments
args = parser.parse_args()
input_list = [args.pkl, args.pdb, args.output]
# If all arguments are None display help text by parsing help
if input_list.count(input_list[0]) == len(input_list):
parser.parse_args(["-h"])
# Check output directory exists
if not os.path.isdir(args.output):
parser.error("ERROR: Output directory not found")
#Check binary provided if pymol file is provided
if args.pdb is not None and args.binary is None:
parser.error("ERROR: pymol binary required to analyse protein structure")
return args
# Perform analysis of alphafold results
def main():
args = cmd_lineparser()
# if pdb structure provided and generates PyMol session with pLDDT coloured
if args.pdb is not None:
print("\n Visualising pLDDT data...\n")
protein_painter(args.pdb, args.output, args.binary)
# if no pdb structure provided skips process
elif args.pdb is None:
print("\n no pdb file provided, skipping pLDDT data visualisation...\n")
# if pkl structure provided, generate predicted aligned error plot
if args.pkl is not None:
print(" plotting predicted aligned error and plddt...")
make_plots(args.pkl, args.output)
# if no pkl file provided skips process
elif args.pkl is None:
print(
" no pickle file provided, skipping predicted aligned error visualisation...\n"
)
print(" all processes finished, shutting down...\n")
# Run analysis
if __name__ == "__main__":
main()