-
Notifications
You must be signed in to change notification settings - Fork 5
/
gui.py
executable file
·387 lines (349 loc) · 15 KB
/
gui.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
# Import necessary libraries
import itertools
import os
import shlex
import subprocess
import sys
import time
import warnings
from pathlib import Path
import streamlit as st
from scripts.clustering_metrics import CLUSTERING_METRICS
from scripts.consensus_methods import CONSENSUS_METHODS
from scripts.docking_functions import DOCKING_PROGRAMS
from scripts.rescoring_functions import RESCORING_FUNCTIONS
warnings.filterwarnings("ignore", category=DeprecationWarning)
st.set_page_config(page_title="DockM8 v1.0.0", page_icon="./media/DockM8_logo.png", layout="wide")
# Sidebar
st.sidebar.image(image="./media/DockM8_white_horizontal.png", width=200)
st.sidebar.title("DockM8")
st.sidebar.subheader("Open-source consensus docking for everyone")
st.sidebar.link_button("Github", url="https://github.com/DrugBud-Suite/DockM8")
st.sidebar.link_button("Visit Website", url="https://drugbud-suite.github.io/dockm8-web/")
st.sidebar.link_button("Publication", url="https://doi.org/your-doi")
st.sidebar.link_button("Zenodo repository", url="https://doi.org/your-doi")
# Logo
st.columns(3)[1].image(image="./media/DockM8_white_vertical.png", width=400)
# Setup
CWD = os.getcwd()
st.header("Setup", divider="orange")
mode = st.selectbox(
label="Which mode do you want to run DockM8 in?",
key=0,
options=("Single", "Ensemble"),
help="Single Docking: DockM8 will dock each ligand in the library to a single receptor. "
+ "Ensemble Docking: DockM8 will dock each ligand in the library to all specified receptors and then combine the results to create an ensemble consensus.",
)
if mode != "Single":
threshold = st.slider(
label="Threshold for ensemble consensus (in %)",
min_value=0.0,
max_value=10.0,
step=0.1,
value=1.0,
help="Threshold for ensemble consensus (in %). DockM8 will only consider a ligand as a consensus hit if it is a top scorer for all the receptors.",
)
num_cpus = st.slider(
"Number of CPUs",
min_value=1,
max_value=os.cpu_count(),
step=1,
value=int(os.cpu_count() * 0.9),
help="Number of CPUs to use for calculations",
)
software = st.text_input(
"Choose a software directory",
value=CWD + "/software",
help="Type the directory containing the software folder: For example: /home/user/Dockm8/software",
)
gen_decoys = st.toggle(
label="Generate decoys",
value=False,
help="Generate decoys for the active ligands and determine optimal DockM8 conditions",
)
if mode == "Single":
receptor_value = CWD + "/dockm8_testing/4kd1_p.pdb"
reference_value = CWD + "/dockm8_testing/4kd1_l.sdf"
library_value = CWD + "/dockm8_testing/library.sdf"
if mode == "Ensemble":
receptor_value = CWD + "/dockm8_testing/4kd1_p.pdb, " + CWD + "/dockm8_testing/1fvv_p.pdb"
reference_value = CWD + "/dockm8_testing/4kd1_l.sdf, " + CWD + "/dockm8_testing/1fvv_l.sdf"
library_value = CWD + "/dockm8_testing/library.sdf"
if gen_decoys:
st.subheader("Decoy generation", divider="orange")
# Active ligands
active_ligands = st.text_input(
label="Enter the path to the active ligands file (.sdf format)",
value = CWD + "/dockm8_testing/CDK2_actives.sdf",
help="Choose an active ligands file (.sdf format)",
)
# Number of decoys
n_decoys = st.slider(
label="Number of decoys",
min_value=1,
max_value=100,
step=1,
value=10,
help="Number of decoys to generate for each active ligand",
)
# Decoy generation program
decoy_model = st.selectbox(
label="Which decoy generation model do you want to use?",
options=("DUD-E", "DEKOIS", "DUD-E_phosphorus"),
help="Select which Deepcoy decoy generation model you want to use ",
)
col1, col2 = st.columns(2)
# Receptor(s)
col1.header("Receptor(s)", divider="orange")
receptor_file = col1.text_input(
label="File path(s) of one or more multiple receptor files (.pdb format), separated by commas",
help="Choose one or multiple receptor files (.pdb format)",
value=receptor_value,
placeholder="Enter path(s) here",
)
# Prepare receptor
prepare_receptor = col1.toggle(
label="Prepare receptor using Protoss",
value=True,
help="Choose whether or not to use Protoss Web service to protonate the protein structure",
)
# Pocket finding
col1.subheader("Pocket finding", divider="orange")
pocket_mode = col1.selectbox(
label="How should the pocket be defined?",
options=("Reference", "RoG", "Dogsitescorer", "Custom"),
help="Reference Ligand: DockM8 will use the reference ligand to define the pocket. "
+ "Reference Ligand RoG: DockM8 will use the reference ligand radius of gyration. "
+ "DogSiteScorer: DockM8 will use the DogSiteScorer pocket finding algorithm to define the pocket."
+ "Custom: Define your own pocket center and size coordinates."
)
# Reference ligand
if pocket_mode == "Reference" or pocket_mode == "RoG":
reference_file = col1.text_input(
label="File path(s) of one or more multiple reference ligand files (.sdf format), separated by commas",
help="Choose one or multiple reference ligand files (.pdb format)",
value=reference_value,
placeholder="Enter path(s) here",
)
elif pocket_mode == "Custom" and mode == "Single":
ccol1, ccol2, ccol3 = col1.columns(3)
x_center = ccol1.number_input(label="X Center", value=0.0, help="Enter the X coordinate of the pocket center")
y_center = ccol2.number_input(label="Y Center", value=0.0, help="Enter the Y coordinate of the pocket center")
z_center = ccol3.number_input(label="Z Center", value=0.0, help="Enter the Z coordinate of the pocket center")
x_size = ccol1.number_input(label="X Size", value=20.0, help="Enter the size of the pocket in the X direction")
y_size = ccol2.number_input(label="Y Size", value=20.0, help="Enter the size of the pocket in the Y direction")
z_size = ccol3.number_input(label="Z Size", value=20.0, help="Enter the size of the pocket in the Z direction")
pocket_coordinates = {"center": [x_center,y_center,z_center],
"size": [x_size,y_size,z_size]}
elif pocket_mode == "Custom" and mode != "Single":
col1.error("Custom pocket definition does not currently work in ensemble mode, please change the pocket definition mode")
if pocket_mode == 'Dogsitescorer':
dss_mode = col1.selectbox(
label="Choose method to select binding site",
options=("Volume", "Druggability_Score", "Surface", "Depth"),
help=
"Choose which DogSiteScorer metric to use to select the binding site. The site with the highest value of the chosen metric will be used."
)
# Ligand library
col2.header("Ligands", divider="orange")
ligand_file = col2.text_input(
label="Entre the path to the ligand library file (.sdf format)",
value=library_value,
help="Choose a ligand library file (.sdf format)",
)
# ID column
id_column = col2.text_input(
label="Choose the column name that contains the ID of the ligand",
value="ID",
help="Choose the column name that contains the ID of the ligand",
)
# Ligand conformers
col2.subheader("Ligand conformers", divider="orange")
ligand_conformers = col2.selectbox(
label="How should the conformers be generated?",
options=["MMFF", "GypsumDL"],
index=1,
help="MMFF: DockM8 will use MMFF to prepare the ligand 3D conformers. " +
"GypsumDL: DockM8 will use Gypsum-DL to prepare the ligand 3D conformers.",
)
# Ligand protonation
col2.subheader("Ligand protonation", divider="orange")
ligand_protonation = col2.selectbox(
label="How should the ligands be protonated?",
options=("None", "GypsumDL"),
index=1,
help="None: No protonation " +
"Gypsum-DL: DockM8 will use Gypsum-DL to protonate the ligands",
)
# Docking programs
st.header("Docking programs", divider="orange")
docking_programs = st.multiselect(
label="Choose the docking programs you want to use",
default=["GNINA"],
options=DOCKING_PROGRAMS,
help=
"Choose the docking programs you want to use, multiple selection is allowed",
)
if "PLANTS" in docking_programs and not os.path.exists(
"/path/to/software/PLANTS"):
st.warning(
'PLANTS was not found in the software folder, please visit http://www.tcd.uni-konstanz.de/research/plants.php',
icon=':warning:')
# Number of poses
nposes = st.slider(
label="Number of poses",
min_value=1,
max_value=100,
step=5,
value=10,
help="Number of poses to generate for each ligand",
)
# Exhaustiveness
exhaustiveness = st.select_slider(
label="Exhaustiveness",
options=[1, 2, 4, 8, 16, 32],
value=8,
help=
"Exhaustiveness of the docking, only applies to GNINA, SMINA, QVINA2 and QVINAW. Higher values can significantly increase the runtime.",
)
bust_poses = st.checkbox(
label="Bust poses using PoseBusters : WARNING may take a long time to run",
value=False,
help=
"Bust poses using PoseBusters : Will remove any poses with clashes, non-flat aromatic rings etc. WARNING may take a long time to run",
)
# Pose selection
st.header("Pose Selection", divider="orange")
pose_selection = st.multiselect(
label="Choose the pose selection method you want to use",
default=["KORP-PL"],
options=list(CLUSTERING_METRICS.keys()) + [
"bestpose",
"bestpose_GNINA",
"bestpose_SMINA",
"bestpose_PLANTS",
"bestpose_QVINA2",
"bestpose_QVINAW",
] + list(RESCORING_FUNCTIONS.keys()),
help="The method(s) to use for pose clustering. Must be one or more of:\n"
+ "- RMSD : Cluster compounds on RMSD matrix of poses \n" +
"- spyRMSD : Cluster compounds on symmetry-corrected RMSD matrix of poses\n"
+
"- espsim : Cluster compounds on electrostatic shape similarity matrix of poses\n"
+ "- USRCAT : Cluster compounds on shape similarity matrix of poses\n" +
"- 3DScore : Selects pose with the lowest average RMSD to all other poses\n"
+ "- bestpose : Takes the best pose from each docking program\n" +
"- bestpose_GNINA : Takes the best pose from GNINA docking program\n" +
"- bestpose_SMINA : Takes the best pose from SMINA docking program\n" +
"- bestpose_QVINAW : Takes the best pose from QVINAW docking program\n" +
"- bestpose_QVINA2 : Takes the best pose from QVINA2 docking program\n" +
"- bestpose_PLANTS : Takes the best pose from PLANTS docking program \n" +
"- You can also use any of the scoring functions and DockM8 will select the best pose for each compound according to the specified scoring function.",
)
# Clustering algorithm
if any(x in CLUSTERING_METRICS.keys() for x in pose_selection):
clustering_algorithm = st.selectbox(
label="Which clustering algorithm do you want to use?",
options=("KMedoids", "Aff_Prop"),
index=0,
help=
'Which algorithm to use for clustering. Must be one of "KMedoids", "Aff_prop". Must be set when using "RMSD", "spyRMSD", "espsim", "USRCAT" clustering metrics.',
)
else:
clustering_algorithm = None
# Rescoring
st.header("Scoring functions", divider="orange")
rescoring = st.multiselect(
label="Choose the scoring functions you want to use",
default=["CNN-Score", "KORP-PL"],
options=list(RESCORING_FUNCTIONS.keys()),
help="The method(s) to use for scoring. Multiple selection allowed",
)
# Consensus
st.header("Consensus", divider="orange")
consensus_method = st.selectbox(
label="Choose which consensus algorithm to use: ",
index=9,
options=list(CONSENSUS_METHODS.keys()),
help="The method to use for consensus.",
)
if gen_decoys:
total_combinations = 0
for length in range(2, len(rescoring)):
combinations = list(itertools.combinations(rescoring, length))
total_combinations += len(combinations)
num_possibilities = (len(CONSENSUS_METHODS.keys()) * len(pose_selection) *
(len(rescoring) + total_combinations))
if num_possibilities > 10000:
st.warning(
f"WARNING: The combination of scoring functions and pose selection method you have selected will yield a large number of possible combinations ({num_possibilities}). This may take a long time to run."
)
command = (f'{sys.executable} {CWD}/dockm8.py '
f'--software {software} '
f'--receptor {receptor_file} '
f'--docking_library {ligand_file} '
f'--idcolumn {id_column} '
f'--prepare_proteins {prepare_receptor} '
f'--conformers {ligand_conformers} '
f'--protonation {ligand_protonation} '
f'--docking_programs {" ".join(docking_programs)} '
f'--bust_poses {bust_poses} '
f'--pose_selection {" ".join(pose_selection)} '
f'--nposes {nposes} '
f'--exhaustiveness {exhaustiveness} '
f'--ncpus {num_cpus} '
f'--clustering_method {clustering_algorithm} '
f'--rescoring {" ".join(rescoring)} '
f'--consensus {consensus_method}')
# Add pocket-specific arguments
if pocket_mode == "Custom":
pocket_str = '*'.join([
f"{k}:{','.join(map(str, v))}" for k, v in pocket_coordinates.items()
])
command += (f" --pocket {pocket_str}")
elif pocket_mode == "Reference" or pocket_mode == "RoG":
command += (f" --pocket {pocket_mode}")
command += (f" --reffile {reference_file}")
elif pocket_mode == "Dogsitescorer":
command += (f" --pocket {pocket_mode}")
command += (f" --dogsitescorer_mode {dss_mode}")
# Add mode-specific arguments
if mode == "ensemble" or mode == "active_learning":
command += f" --mode {mode} --threshold {threshold}"
else:
command += f" --mode {mode}"
if gen_decoys:
command += (" --gen_decoys True "
f"--decoy_model {decoy_model} "
f"--n_decoys {n_decoys} "
f"--actives {active_ligands} ")
open("log.txt", "w").close()
def run_dockm8(command_list):
print("Running")
subprocess.Popen(command_list)
def read_log_file(file_path):
with open(file_path, "r") as file:
content = file.read()
return content
# Run the script file
if st.button("Run DockM8"):
command_list = shlex.split(command)
run_dockm8(command_list)
log_file_path = Path(CWD, "log.txt")
if log_file_path is not None:
log_content = read_log_file(log_file_path)
# Create an empty container for dynamic content updates
log_container = st.empty()
# Display initial log content
log_container.text_area(
"Log (if inactive for a long time, please check your terminal from which you launched the GUI, any errors will be there)",
log_content,
height=300)
# Periodically check for changes in the log file
while True:
time.sleep(1) # Adjust the interval as needed
new_log_content = read_log_file(log_file_path)
if new_log_content != log_content:
# Update the contents of the existing text area
log_container.text_area("Log (if inactive for a long time, please check your terminal from which you launched the GUI, any errors will be there)", new_log_content, height=300)
log_content = new_log_content