From 77433ea3a1cd6a28351a2af6b06eeb46f7ab7a40 Mon Sep 17 00:00:00 2001 From: Martin Buttenschoen <42468905+maabuu@users.noreply.github.com> Date: Mon, 10 Jun 2024 12:15:02 +0100 Subject: [PATCH] Fix passing mol objects to bust bug (#40) API: - Fix bug identified by Guy Durant: passing molecule objects into bust failed SuCOS module: - Update documentation and add reference --- .vscode/launch.json | 19 +++++++++ posebusters/__init__.py | 2 +- posebusters/modules/sucos.py | 37 ++++++++++++------ posebusters/posebusters.py | 4 +- posebusters/tools/loading.py | 3 ++ tests/conftest.py | 10 +++++ .../7BRV_F5R_7WB6_F5R_larger_ligand.sdf | 39 +++++++++++++++++++ .../7BRV_F5R_7WB6_F5R_smaller_ligand.sdf | 25 ++++++++++++ tests/test_modules/test_sucos.py | 19 ++++++++- tests/test_posebusters.py | 15 ++++++- 10 files changed, 156 insertions(+), 17 deletions(-) create mode 100644 tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_larger_ligand.sdf create mode 100644 tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_smaller_ligand.sdf diff --git a/.vscode/launch.json b/.vscode/launch.json index 5cbf33c..6d3cc53 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -249,5 +249,24 @@ "long", ] }, + { + "name": "bust -- memory problem", + "type": "debugpy", + "request": "launch", + "cwd": "${workspaceFolder}", + "module": "posebusters", + "justMyCode": true, + "stopOnEntry": false, + "args": [ + "tests/conftest/1W1P_GIO/1W1P_GIO_ligand.sdf", + "-l", + "tests/conftest/1W1P_GIO/1W1P_GIO_ligands.sdf", + "-p", + "tests/conftest/1W1P_GIO/1W1P_GIO_protein.pdb", + "--full-report", + "--outfmt", + "long", + ] + }, ] } \ No newline at end of file diff --git a/posebusters/__init__.py b/posebusters/__init__.py index 1a75554..9dfbc58 100644 --- a/posebusters/__init__.py +++ b/posebusters/__init__.py @@ -24,4 +24,4 @@ "check_volume_overlap", ] -__version__ = "0.2.13" +__version__ = "0.2.14" diff --git a/posebusters/modules/sucos.py b/posebusters/modules/sucos.py index 14a37bb..ca5026f 100644 --- a/posebusters/modules/sucos.py +++ b/posebusters/modules/sucos.py @@ -1,4 +1,5 @@ """Module to calculate SuCOS score.""" + from __future__ import annotations import os @@ -8,7 +9,7 @@ from rdkit.Chem.FeatMaps import FeatMaps from rdkit.Chem.rdchem import Mol from rdkit.Chem.rdMolChemicalFeatures import BuildFeatureFactory -from rdkit.Chem.rdmolops import SanitizeMol +from rdkit.Chem.rdmolops import AddHs, RemoveHs, SanitizeMol from rdkit.Chem.rdShapeHelpers import ShapeProtrudeDist FACTORY = BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef")) @@ -30,8 +31,7 @@ def get_feature_map_score( mol_large: Mol, conf_id_small: int = -1, conf_id_large: int = -1, - score_mode=FeatMaps.FeatMapScoreMode.Best, -): +) -> float: """Calculate the feature map score between two molecules. Good introduction: @@ -44,7 +44,7 @@ def get_feature_map_score( # feature map based on small molecule feature_map = FeatMaps.FeatMap(feats=features_small, weights=[1] * len(features_small), params=PARAMETERS) - feature_map.scoreMode = score_mode + feature_map.scoreMode = FeatMaps.FeatMapScoreMode.Best # score feature in large molecule present in small molecule feature_map_score = feature_map.ScoreFeats(features_large) / min(feature_map.GetNumFeatures(), len(features_large)) @@ -55,9 +55,9 @@ def get_feature_map_score( def get_sucos_score( mol_reference: Mol, mol_probe: Mol, - score_mode=FeatMaps.FeatMapScoreMode.Best, conf_id_reference: int = -1, conf_id_probe: int = -1, + heavy_only: bool | None = True, ) -> float: """Calculate the SuCOS score between a reference ligand and a list of probe ligands. @@ -69,15 +69,19 @@ def get_sucos_score( SuCOS score. Notes: + SuCOS described in https://chemrxiv.org/engage/chemrxiv/article-details/60c741a99abda23230f8bed5 Adapted from https://github.com/MarcMoesser/SuCOS/blob/master/calc_SuCOS_normalized.py """ + # explicit or implicit hydrogens should be same for both molecules + mol_reference = handle_hydrogens(mol_reference, heavy_only=heavy_only) + mol_probe = handle_hydrogens(mol_probe, heavy_only=heavy_only) + feature_map_score = get_feature_map_score( mol_small=mol_reference, mol_large=mol_probe, conf_id_small=conf_id_reference, conf_id_large=conf_id_probe, - score_mode=score_mode, ) protrusion_distance = ShapeProtrudeDist( mol1=mol_reference, @@ -94,14 +98,14 @@ def get_sucos_score( def check_sucos(mol_pred: Mol, mol_true: Mol, sucos_threshold: float = 0.4) -> dict[str, dict[str, bool | float]]: - """Calculate RMSD and related metrics between predicted molecule and closest ground truth molecule. + """Calculate SuCOS and related metrics between predicted molecule and closest ground truth molecule. Args: mol_pred: Predicted molecule (docked ligand) with exactly one conformer. mol_true: Ground truth molecule (crystal ligand) with at least one conformer. If multiple conformers are - present, the lowest RMSD will be reported. - rmsd_threshold: Threshold in angstrom for reporting whether RMSD is within threshold. Defaults to 2.0. - heavy_only: Whether to only consider heavy atoms for RMSD calculation. Defaults to True. + present, the highest SuCOS will be reported. + sucos_threshold: Threshold in angstrom for reporting whether SuCOS is within threshold. Defaults to 0.4. + heavy_only: Whether to only consider heavy atoms for SuCOS calculation. Defaults to True. Returns: PoseBusters results dictionary. @@ -109,8 +113,8 @@ def check_sucos(mol_pred: Mol, mol_true: Mol, sucos_threshold: float = 0.4) -> d assert isinstance(mol_true, Mol), "Ground truth molecule is missing." assert isinstance(mol_pred, Mol), "Predicted molecule is missing." num_conf = mol_true.GetNumConformers() - assert num_conf > 0, "Crystal ligand needs at least one conformer." - assert mol_pred.GetNumConformers() == 1, "Docked ligand should only have one conformer." + assert num_conf > 0, "Ground truth molecule needs at least one conformer." + assert mol_pred.GetNumConformers() == 1, "Predicted molecule should only have one conformer." try: SanitizeMol(mol_pred) @@ -125,3 +129,12 @@ def check_sucos(mol_pred: Mol, mol_true: Mol, sucos_threshold: float = 0.4) -> d results = {"sucos": best_sucos, "sucos_within_threshold": sucos_within_threshold} return {"results": results} + + +def handle_hydrogens(mol: Mol, heavy_only: bool | None = True) -> Mol: + """Remove, add or do not modify hydrogens in a molecule.""" + if heavy_only is None: + return mol + if heavy_only: + return RemoveHs(mol) + return AddHs(mol, addCoords=True) diff --git a/posebusters/posebusters.py b/posebusters/posebusters.py index 73df37c..04ece3a 100644 --- a/posebusters/posebusters.py +++ b/posebusters/posebusters.py @@ -93,10 +93,10 @@ def bust( Returns: Pandas dataframe with results. """ - mol_pred = [mol_pred] if isinstance(mol_pred, (Mol, Path, str)) else mol_pred + mol_pred_list: Iterable[Mol | Path | str] = [mol_pred] if isinstance(mol_pred, (Mol, Path, str)) else mol_pred columns = ["mol_pred", "mol_true", "mol_cond"] - self.file_paths = pd.DataFrame([[mol_pred, mol_true, mol_cond] for mol_pred in mol_pred], columns=columns) + self.file_paths = pd.DataFrame([[mol_pred, mol_true, mol_cond] for mol_pred in mol_pred_list], columns=columns) results_gen = self._run() diff --git a/posebusters/tools/loading.py b/posebusters/tools/loading.py index 606138f..2471c71 100644 --- a/posebusters/tools/loading.py +++ b/posebusters/tools/loading.py @@ -34,6 +34,9 @@ def safe_load_mol(path: Path, load_all: bool = False, **load_params) -> Mol | No Returns: Molecule object or None if loading failed. """ + if isinstance(path, Mol): + return path + try: path = _check_path(path) with CaptureLogger(): diff --git a/tests/conftest.py b/tests/conftest.py index bf4d259..fa0e468 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -185,3 +185,13 @@ def mol_cond_7ztl_bcn(): @pytest.fixture def mol_disconnnected_atoms(): return MolFromMolFile("tests/conftest/mol_disconnected_atoms.sdf", sanitize=True) + + +@pytest.fixture +def mol_small_7brv_f5r_7wb6_f5r(): + return MolFromMolFile("tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_smaller_ligand.sdf", removeHs=True) + + +@pytest.fixture +def mol_large_7brv_f5r_7wb6_f5r(): + return MolFromMolFile("tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_larger_ligand.sdf", removeHs=True) diff --git a/tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_larger_ligand.sdf b/tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_larger_ligand.sdf new file mode 100644 index 0000000..d320f8e --- /dev/null +++ b/tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_larger_ligand.sdf @@ -0,0 +1,39 @@ +right_molecule_with_hydrogens + RDKit 3D + + 17 17 0 0 0 0 0 0 0 0999 V2000 + 57.8140 43.3950 19.4830 N 0 0 0 0 0 0 0 0 0 0 0 0 + 55.9570 43.8700 15.0390 C 0 0 0 0 0 0 0 0 0 0 0 0 + 56.2270 43.1270 16.1730 C 0 0 0 0 0 0 0 0 0 0 0 0 + 56.8350 43.7120 17.2820 C 0 0 0 0 0 0 0 0 0 0 0 0 + 57.1250 42.9160 18.5000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 57.1620 45.0630 17.2290 C 0 0 0 0 0 0 0 0 0 0 0 0 + 56.8990 45.8100 16.0990 C 0 0 0 0 0 0 0 0 0 0 0 0 + 56.3030 45.2030 15.0180 C 0 0 0 0 0 0 0 0 0 0 0 0 + 56.6450 41.6610 18.5400 N 0 0 0 0 0 0 0 0 0 0 0 0 + 55.9500 46.2400 13.4670 Br 0 0 0 0 0 0 0 0 0 0 0 0 + 58.1060 44.1550 19.3610 H 0 0 0 0 0 0 0 0 0 0 0 0 + 55.5430 43.4720 14.2930 H 0 0 0 0 0 0 0 0 0 0 0 0 + 55.9960 42.2150 16.1950 H 0 0 0 0 0 0 0 0 0 0 0 0 + 57.5680 45.4740 17.9720 H 0 0 0 0 0 0 0 0 0 0 0 0 + 57.1240 46.7240 16.0670 H 0 0 0 0 0 0 0 0 0 0 0 0 + 56.1730 41.3460 17.8580 H 0 0 0 0 0 0 0 0 0 0 0 0 + 56.8010 41.1510 19.2480 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 2 3 + 1 11 1 0 + 2 3 2 0 + 2 8 1 0 + 2 12 1 0 + 3 4 1 0 + 3 13 1 0 + 4 5 1 0 + 4 6 2 0 + 5 9 1 0 + 6 7 1 0 + 6 14 1 0 + 7 8 2 0 + 7 15 1 0 + 8 10 1 0 + 9 16 1 0 + 9 17 1 0 +M END diff --git a/tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_smaller_ligand.sdf b/tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_smaller_ligand.sdf new file mode 100644 index 0000000..e1441e6 --- /dev/null +++ b/tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_smaller_ligand.sdf @@ -0,0 +1,25 @@ +left_molecule_without_hydrogens + RDKit 3D + + 10 10 0 0 0 0 0 0 0 0999 V2000 + 57.7298 43.4513 19.4458 N 0 0 0 0 0 0 0 0 0 0 0 0 + 56.8728 45.7841 16.0476 C 0 0 0 0 0 0 0 0 0 0 0 0 + 57.1267 45.0725 17.1867 C 0 0 0 0 0 0 0 0 0 0 0 0 + 56.8325 43.7291 17.2368 C 0 0 0 0 0 0 0 0 0 0 0 0 + 57.0936 42.9364 18.4399 C 0 0 0 0 0 0 0 0 0 0 0 0 + 56.1174 43.1560 16.2069 C 0 0 0 0 0 0 0 0 0 0 0 0 + 55.8545 43.8496 15.0648 C 0 0 0 0 0 0 0 0 0 0 0 0 + 56.2497 45.1542 15.0115 C 0 0 0 0 0 0 0 0 0 0 0 0 + 56.7194 41.6618 18.4761 N 0 0 0 0 0 0 0 0 0 0 0 0 + 55.9070 46.2021 13.4738 Br 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 2 3 + 2 3 2 0 + 2 8 1 0 + 3 4 1 0 + 4 5 1 0 + 4 6 2 0 + 5 9 1 0 + 6 7 1 0 + 7 8 2 0 + 8 10 1 0 +M END diff --git a/tests/test_modules/test_sucos.py b/tests/test_modules/test_sucos.py index ce9679e..5b2998e 100644 --- a/tests/test_modules/test_sucos.py +++ b/tests/test_modules/test_sucos.py @@ -1,11 +1,12 @@ from __future__ import annotations import pytest +from rdkit.Chem.rdmolops import AddHs, RemoveHs from posebusters.modules.sucos import check_sucos -def test_check_sucos(mol_rq3_x00, mol_rq3_x01, mol_rq3_x10): +def test_check_sucos(mol_rq3_x00, mol_rq3_x01, mol_rq3_x10, mol_small_7brv_f5r_7wb6_f5r, mol_large_7brv_f5r_7wb6_f5r): out = check_sucos(mol_rq3_x00, mol_rq3_x00) assert out["results"]["sucos"] == pytest.approx(1.0, abs=1e-6) @@ -16,6 +17,22 @@ def test_check_sucos(mol_rq3_x00, mol_rq3_x01, mol_rq3_x10): assert out["results"]["sucos"] == pytest.approx(0.0, abs=1e-6) +def test_check_sucos_hydrogens(mol_small_7brv_f5r_7wb6_f5r, mol_large_7brv_f5r_7wb6_f5r): + out = check_sucos( + AddHs(mol_large_7brv_f5r_7wb6_f5r, addCoords=True), AddHs(mol_small_7brv_f5r_7wb6_f5r, addCoords=True) + ) + assert out["results"]["sucos"] <= 1.0 + + out = check_sucos(RemoveHs(mol_large_7brv_f5r_7wb6_f5r), RemoveHs(mol_small_7brv_f5r_7wb6_f5r)) + assert out["results"]["sucos"] <= 1.0 + + out = check_sucos(AddHs(mol_large_7brv_f5r_7wb6_f5r, addCoords=True), RemoveHs(mol_small_7brv_f5r_7wb6_f5r)) + assert out["results"]["sucos"] <= 1.0 + + out = check_sucos(RemoveHs(mol_small_7brv_f5r_7wb6_f5r), AddHs(mol_large_7brv_f5r_7wb6_f5r, addCoords=True)) + assert out["results"]["sucos"] <= 1.0 + + def test_check_sucos_multiple_ground_truth(mol_one_true_1w1p, mol_true_1w1p): out = check_sucos(mol_one_true_1w1p, mol_true_1w1p) assert out["results"]["sucos"] == pytest.approx(1.0, abs=1e-6) diff --git a/tests/test_posebusters.py b/tests/test_posebusters.py index 9de4462..5c9125e 100644 --- a/tests/test_posebusters.py +++ b/tests/test_posebusters.py @@ -1,6 +1,6 @@ import math -from rdkit.Chem.rdmolfiles import MolFromSmiles +from rdkit.Chem.rdmolfiles import MolFromMolFile, MolFromPDBFile, MolFromSmiles from posebusters import PoseBusters @@ -107,3 +107,16 @@ def test_bust_gen() -> None: posebusters = PoseBusters("gen") result = posebusters.bust(mol_pred=mol_larger, mol_true=mol_smaller, mol_cond=mol_cond_smaller, full_report=True) assert result["sucos"][0] > 0.2 + + +def test_bust_loaded_mols() -> None: + posebuster = PoseBusters("redock") + + mol_pred = MolFromMolFile(mol_pred_1ia1) + mol_true = MolFromMolFile(mol_true_1ia1) + mol_cond = MolFromPDBFile(mol_cond_1ia1) + + df = posebuster.bust([mol_pred], mol_true, mol_cond) + assert df["mol_pred_loaded"].all() + assert df["mol_true_loaded"].all() + assert df["mol_cond_loaded"].all()