[feat] new version for pdb2pdbqt (#117)

* for receptor pdb conver to pabqt * changed unidock.py * del docking_grids_generator_old.py and schrodinger_atom_types.py, changed protein_pdbqt_writer.py * del protein_prep/pdb2pdbqt.py * changed import path * changed test_receptor_processor.py * changed test_proprep.py and for mcdock.py del pdb2pdbqt add receptor_processor * [bugfix] escape some pyright warnings and fix ad4 option parsing in autogrid runner * [feature] rename file names for templates * [bugfix] add missing import * [bugfix] add required dependencies * pyproject.toml: add some dir; mcdock.py,unidock_pipeline.py: Path-->str * unidock.py: del line 37 * [bugfix] ad4 map path * [feature] receptor preprocessor options, deprecated PDBQT file format as receptor input * [feature] dependencies * add_argument * change receptor_preprocessor_runner.py args.covalent_residue_atom_info * [feature] remove autogrid runner usage * [bugfix] autogrid ruuner remove * [feature] ad4 map unit test * [bugfix] receptor processor option * [bugfix] pocket option * [bugfix] receptor option * [bugfix] mcdock test * [bugfix] ut option * changed : resiudue chain id; or atom type; or alternative position * [bugfix] mcdock args * [bugfix] unidock pipeline option * [bugfix] upfate test case pdb * [feature] update covalent receptor preparation for watvina/unidock style covalnt docking * fix:pyright check; build:openmm with cuda-version --------- Co-authored-by: Hong-Rui Lin <[email protected]> Co-authored-by: Hong-Rui Lin <[email protected]> Co-authored-by: dp-yuanyn <[email protected]>
dptech-corp · Aug 6, 2024 · e98f756 · e98f756
1 parent 424c4c3
commit e98f756
Show file tree

Hide file tree

Showing 70 changed files with 6,602,876 additions and 21,669 deletions.
diff --git a/unidock_tools/Dockerfile b/unidock_tools/Dockerfile
@@ -20,9 +20,8 @@ RUN wget --quiet -O CDPKit.sh https://github.com/molinfo-vienna/CDPKit/releases/
     rm CDPKit.sh
 
 WORKDIR /opt
-RUN mamba install -y ipython requests tqdm python-lmdb openbabel ambertools openmm -c conda-forge
-RUN mamba create -y -n mgltools mgltools autogrid -c bioconda
-ENV PATH $PATH:/opt/conda/envs/mgltools/bin
+
+RUN mamba install -y ipython requests tqdm python-lmdb openbabel ambertools openmm cuda-version=12.0 -c conda-forge
 
 COPY . /opt/unidock_tools
 RUN cd /opt/unidock_tools && \

diff --git a/unidock_tools/pyproject.toml b/unidock_tools/pyproject.toml
@@ -9,7 +9,9 @@ version = "0.0.1"
 keywords = [ "Docking" ]
 authors = [
     { name = "Yannan Yuan", email = "[email protected]" },
-    { name = "Hang Zheng", email = "[email protected]" }
+    { name = "Hang Zheng", email = "[email protected]" },
+    { name = "Hong-Rui Lin", email = "[email protected]" },
+    { name = "Pengli Zhang", email = "[email protected]" }
 ]
 description = "Several docking-related applications based on Uni-Dock."
 readme = "README.md"
@@ -31,7 +33,8 @@ classifiers = [
 ]
 
 [tool.setuptools.package-data]
-"*" = ["*.dat"]
+"*" = ["*/templates/*", "*/receptor_topology/bin/*","*/receptor_topology/data/*"]
+
 
 [project.urls]
 Homepage = "https://github.com/dptech-corp/Uni-Dock"
@@ -55,4 +58,5 @@ reportMissingImports = false
 reportMissingModuleSource = false
 reportAttributeAccessIssue = false
 reportIncompatibleMethodOverride = false
-reportCallIssue = false
+reportCallIssue = false
+reportArgumentType = false
diff --git a/unidock_tools/src/unidock_tools/application/mcdock.py b/unidock_tools/src/unidock_tools/application/mcdock.py
@@ -1,4 +1,4 @@
-from typing import List, Union, Tuple
+from typing import List, Tuple, Union, Optional
 from pathlib import Path
 import os
 import time
@@ -10,7 +10,7 @@
 
 from unidock_tools.utils import time_logger, randstr, make_tmp_dir, MolGroup
 from unidock_tools.modules.confgen import generate_conf
-from unidock_tools.modules.protein_prep import pdb2pdbqt
+from unidock_tools.modules.protein_prep import receptor_preprocessor
 from unidock_tools.modules.ligand_prep import TopologyBuilder
 from unidock_tools.modules.docking import run_unidock
 from .unidock_pipeline import Base, UniDock
@@ -61,6 +61,11 @@ def __init__(self,
                  size_x: float = 22.5,
                  size_y: float = 22.5,
                  size_z: float = 22.5,
+                 kept_ligand_resname_list: Optional[List[str]] = None,
+                 prepared_hydrogen: bool = True,
+                 preserve_original_resname: bool = True,
+                 covalent_residue_atom_info_list: Optional[List[Tuple[str, str]]] = None,
+                 generate_ad4_grids: bool = False,
                  gen_conf: bool = True,
                  max_nconf: int = 1000,
                  min_rmsd: float = 0.5,
@@ -70,7 +75,7 @@ def __init__(self,
         Initializes a MultiConfDock object.
 
         Args:
-            receptor (Path): Path to the receptor file in pdbqt format.
+            receptor (Path): Path to the receptor file in PDB format.
             ligands (List[Path]): List of paths to the ligand files in sdf format.
             center_x (float): X-coordinate of the center of the docking box.
             center_y (float): Y-coordinate of the center of the docking box.
@@ -84,11 +89,22 @@ def __init__(self,
         self.workdir = workdir
         self.workdir.mkdir(parents=True, exist_ok=True)
         if receptor.suffix == ".pdb":
-            pdb2pdbqt(receptor, workdir.joinpath(receptor.stem + ".pdbqt"))
-            receptor = workdir.joinpath(receptor.stem + ".pdbqt")
-        if receptor.suffix != ".pdbqt":
-            logging.error("receptor file must be pdb/pdbqt format")
+            receptor_pdbqt_file_name, protein_grid_prefix = receptor_preprocessor(str(receptor),
+                                                                                  kept_ligand_resname_list=kept_ligand_resname_list,
+                                                                                  prepared_hydrogen=prepared_hydrogen,
+                                                                                  preserve_original_resname=preserve_original_resname,
+                                                                                  target_center=(center_x, center_y, center_z),
+                                                                                  box_size=(size_x, size_y, size_z),
+                                                                                  covalent_residue_atom_info_list=covalent_residue_atom_info_list,
+                                                                                  generate_ad4_grids=generate_ad4_grids,
+                                                                                  working_dir_name=str(workdir))
+
+            self.receptor = receptor_pdbqt_file_name
+            self.ad4_map_prefix = protein_grid_prefix
+        else:
+            logging.error("receptor file must be PDB format!!")
             exit(1)
+
         self.receptor = receptor
         self.mol_group = MolGroup(ligands)
         self.build_topology()
@@ -194,7 +210,7 @@ def run_unidock(self,
                 receptor=self.receptor, ligands=ligand_list, output_dir=output_dir,
                 center_x=self.center_x, center_y=self.center_y, center_z=self.center_z,
                 size_x=self.size_x, size_y=self.size_y, size_z=self.size_z,
-                scoring=scoring_function, num_modes=num_modes,
+                scoring=scoring_function, ad4_map_prefix=self.ad4_map_prefix, num_modes=num_modes,
                 search_mode=search_mode, exhaustiveness=exhaustiveness, max_step=max_step, 
                 seed=seed, refine_step=refine_step, energy_range=energy_range,
                 score_only=score_only, local_only=local_only,

diff --git a/unidock_tools/src/unidock_tools/application/proteinprep.py b/unidock_tools/src/unidock_tools/application/proteinprep.py
@@ -1,27 +1,66 @@
-import logging
 import argparse
-from unidock_tools.modules.protein_prep import pdb2pdbqt
-
+import logging
+import shutil
+import os
+from unidock_tools.modules.protein_prep import receptor_preprocessor
+from typing import List, Tuple, Dict, Optional
 
 def main(args: dict):
-    pdb2pdbqt(args["receptor_file"], args["output_file"])
+
+    def parse_covalent_residue_atom_info(covalent_residue_atom_info_str: str) -> List[List[Tuple[str, str, int, str]]]:
+        residue_info_list = []
+        residue_atoms = covalent_residue_atom_info_str.split(',')
+        for residue_atom in residue_atoms:
+            residue_info = residue_atom.strip().split()
+            chain_id, residue_name, residue_number, atom_name = residue_info
+            residue_info_list.append((chain_id, residue_name, int(residue_number), atom_name))
+        return residue_info_list
+
+    protein_pdbqt_file_name, protein_grid_prefix = receptor_preprocessor(
+        protein_pdb_file_name=args['protein_pdb'],
+        kept_ligand_resname_list=args['kept_ligand_resname_list'],
+        prepared_hydrogen=args['prepared_hydrogen'],
+        preserve_original_resname=args['preserve_resname'],
+        target_center=tuple(args['target_center']),
+        box_size=tuple(args['box_size']),
+        generate_ad4_grids=args['generate_grids'],
+        covalent_residue_atom_info_list = parse_covalent_residue_atom_info(args['covalent_residue_atom_info']) if args['covalent_residue_atom_info'] is not None else None,
+        working_dir_name=args['working_dir']
+    )
 
+    protein_pdbqt_dst = os.path.join(args['working_dir'], args['protein_pdbqt'])
+    shutil.copy(protein_pdbqt_file_name, protein_pdbqt_dst)
 
-def get_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-r", "--receptor_file", type=str, required=True,
-                        help="Input receptor file in PDB format")
-    parser.add_argument("-o", "--output_file", type=str, default="output.pdbqt",
-                        help="Output file in PDBQT format")
+def get_parser():
+    parser = argparse.ArgumentParser(description="Receptor Preprocessor")
+    parser.add_argument("-r", "--protein_pdb", type=str, required=True,
+                        help="protein PDB file name")
+    parser.add_argument("-kr", "--kept_ligand_resname_list", type=str, nargs="+", default=None,
+                        help="list of ligand residue names to keep. To use it like this: -kr Lig1 Lig2 ")
+    parser.add_argument("-ph", "--prepared_hydrogen", action="store_false",
+                        help="prepare hydrogen atoms")
+    parser.add_argument("-pr", "--preserve_resname", action="store_false",
+                        help="preserve original residue names")
+    parser.add_argument("-c", "--target_center", nargs=3, type=float, default=[0.0, 0.0, 0.0],
+                        help="target center coordinates (x, y, z)")
+    parser.add_argument("-s", "--box_size", nargs=3, type=float, default=[22.5, 22.5, 22.5],
+                        help="box size")
+    parser.add_argument("-g", "--generate_grids", action="store_true",
+                        help="generate AD4 grids")
+    parser.add_argument("-cra", "--covalent_residue_atom_info", type=str, default=None,
+                        help="Atom information for covalent residues during receptor preprocessing. To use it like this: -cra 'A VAL 1 CA, A VAL 1 CB, A VAL 1 O'((chain_id, residue_name, residue_number, atom_name)")
+    parser.add_argument("-wd", "--working_dir", type=str, default=".",
+                        help="working directory")
+    parser.add_argument("-o", "--protein_pdbqt", type=str, required=True,
+                        help="protein PDBQT file name")
     return parser
 
-
 def main_cli():
     parser = get_parser()
-    args = parser.parse_args().__dict__
-    logging.info(f"[Params] {args}")
-    main(args)
+    args = vars(parser.parse_args())
 
+    logging.info(f"Running receptor_preprocessor with args: {args}")
+    main(args)
 
 if __name__ == "__main__":
-    main_cli()
+    main_cli()