-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdistogram.py
117 lines (85 loc) · 3.34 KB
/
distogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""
This script computes a distogramm for an MD trajectory
with weights.
"""
import numpy as np
import scipy.spatial
import mdtraj as md
import numba as nb
import matplotlib.pyplot as plt
import tqdm # not necessary, it's jsut for progress visualization of a for loop
@nb.jit
def increment_counts(counts, dist_bin_int, weight):
n_ca, _ = dist_bin_int.shape
for i in range(n_ca):
for j in range(n_ca):
b = dist_bin_int[i, j]
counts[i, j, b] += weight
return counts
########## input params and input data paths ###################
topPath = "topology.pdb"
trajPath = "trajectory.dcd"
weightPath = 'conformer_weights.dat'
n_dist = 32 # number of distance bins
range_dist = 0, 150 # distance range
#################################################################
# load input data
weights = np.loadtxt(weightPath)[:, 1]
t = md.load(trajPath, top=topPath)
n_frames = len(t)
top = t.top
ca_idx = top.select("protein and name CA")
n_ca = len(ca_idx)
axis_dist = np.linspace(*range_dist, n_dist)
axis_spacing = np.diff(axis_dist)[0] # bin width
counts = np.zeros([n_ca, n_ca, n_dist], dtype=np.float32)
counts += 1e-12 # add a small offset to avoid NaNs in later scoring
# distogram calculation
for i_frame in tqdm.tqdm(range(n_frames)):
weight, frame = weights[i_frame], t[i_frame]
xyz_ca = frame.xyz[0, ca_idx,: ]
dist = scipy.spatial.distance_matrix(xyz_ca, xyz_ca) * 10.0 # units in mdtraj in nm
# find bin
dist_bin = dist / axis_spacing
dist_bin_int = dist_bin.astype(dtype=np.int32)
counts = increment_counts(counts, dist_bin_int, weight)
fig, ax = plt.subplots()
sel_bin = 1 # distogram is multidim. array hard to visualize -> instead you can print 2D map at selected distance bin
im = plt.imshow(counts[:, :, sel_bin])
ax.set_xlabel(r'$C_{\alpha}\ $residue number')
ax.set_ylabel(r'$C_{\alpha}\ $residue number')
ax.set_title("distance bin = %.2f Å" % (axis_dist[sel_bin]))
plt.colorbar(im,orientation='vertical').set_label(label="weighted distance occupancy",size=10)
plt.savefig('occupancy_distance_bin_%.2fA.png'% (axis_dist[sel_bin]),dpi=300)
plt.close(fig)
# save distogramm
np.save("distogram.npy", counts)
np.save("distogram_axis.npy", axis_dist)
# Compute mean and SD and skewness of distogram
means = np.zeros((n_ca, n_ca), dtype=np.float32)
sds = np.zeros((n_ca, n_ca), dtype=np.float32)
skws = np.zeros((n_ca, n_ca), dtype=np.float32)
axis_dist2 = axis_dist**2.0
axis_dist3 = axis_dist**3.0
for i in range(n_ca):
for j in range(n_ca):
s = np.clip(np.sum(counts[i, j, :]), 1e-12, 1.0) # "normalize" histogram to 1
mean = np.dot(counts[i, j, :], axis_dist) / s
mean2 = np.dot(counts[i, j, :], axis_dist2) / s
mean3 = np.dot(counts[i, j, :], axis_dist3) / s
d2 = mean2 - mean**2.0
sd = np.sqrt(abs(d2))
skw = (mean3 - 3 * mean * sd**2.0 - mean**3.0) # / (sd**3.0) # removed normalization (numerical unstable)
means[i, j] = mean
sds[i, j] = sd
skws[i, j] = skw
r = np.tanh(abs(means / np.max(means)))
g = np.tanh(abs(sds / np.max(sds)))
b = np.tanh(abs(skws / np.max(skws)))
rgb = np.dstack((r,g,b))
fig, ax = plt.subplots()
im2 = plt.imshow(rgb)
ax.set_xlabel(r'$C_{\alpha}\ $residue number')
ax.set_ylabel(r'$C_{\alpha}\ $residue number')
plt.savefig('mu_sds_skew_of_distogram.png',dpi=300)
plt.close(fig)