Skip to content

Commit

Permalink
Fix passing mol objects to bust bug (#40)
Browse files Browse the repository at this point in the history
API:
- Fix bug identified by Guy Durant: passing molecule objects into bust failed

SuCOS module:
- Update documentation and add reference
  • Loading branch information
maabuu authored Jun 10, 2024
1 parent 59427c1 commit 77433ea
Show file tree
Hide file tree
Showing 10 changed files with 156 additions and 17 deletions.
19 changes: 19 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -249,5 +249,24 @@
"long",
]
},
{
"name": "bust -- memory problem",
"type": "debugpy",
"request": "launch",
"cwd": "${workspaceFolder}",
"module": "posebusters",
"justMyCode": true,
"stopOnEntry": false,
"args": [
"tests/conftest/1W1P_GIO/1W1P_GIO_ligand.sdf",
"-l",
"tests/conftest/1W1P_GIO/1W1P_GIO_ligands.sdf",
"-p",
"tests/conftest/1W1P_GIO/1W1P_GIO_protein.pdb",
"--full-report",
"--outfmt",
"long",
]
},
]
}
2 changes: 1 addition & 1 deletion posebusters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
"check_volume_overlap",
]

__version__ = "0.2.13"
__version__ = "0.2.14"
37 changes: 25 additions & 12 deletions posebusters/modules/sucos.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module to calculate SuCOS score."""

from __future__ import annotations

import os
Expand All @@ -8,7 +9,7 @@
from rdkit.Chem.FeatMaps import FeatMaps
from rdkit.Chem.rdchem import Mol
from rdkit.Chem.rdMolChemicalFeatures import BuildFeatureFactory
from rdkit.Chem.rdmolops import SanitizeMol
from rdkit.Chem.rdmolops import AddHs, RemoveHs, SanitizeMol
from rdkit.Chem.rdShapeHelpers import ShapeProtrudeDist

FACTORY = BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef"))
Expand All @@ -30,8 +31,7 @@ def get_feature_map_score(
mol_large: Mol,
conf_id_small: int = -1,
conf_id_large: int = -1,
score_mode=FeatMaps.FeatMapScoreMode.Best,
):
) -> float:
"""Calculate the feature map score between two molecules.
Good introduction:
Expand All @@ -44,7 +44,7 @@ def get_feature_map_score(

# feature map based on small molecule
feature_map = FeatMaps.FeatMap(feats=features_small, weights=[1] * len(features_small), params=PARAMETERS)
feature_map.scoreMode = score_mode
feature_map.scoreMode = FeatMaps.FeatMapScoreMode.Best

# score feature in large molecule present in small molecule
feature_map_score = feature_map.ScoreFeats(features_large) / min(feature_map.GetNumFeatures(), len(features_large))
Expand All @@ -55,9 +55,9 @@ def get_feature_map_score(
def get_sucos_score(
mol_reference: Mol,
mol_probe: Mol,
score_mode=FeatMaps.FeatMapScoreMode.Best,
conf_id_reference: int = -1,
conf_id_probe: int = -1,
heavy_only: bool | None = True,
) -> float:
"""Calculate the SuCOS score between a reference ligand and a list of probe ligands.
Expand All @@ -69,15 +69,19 @@ def get_sucos_score(
SuCOS score.
Notes:
SuCOS described in https://chemrxiv.org/engage/chemrxiv/article-details/60c741a99abda23230f8bed5
Adapted from https://github.com/MarcMoesser/SuCOS/blob/master/calc_SuCOS_normalized.py
"""

# explicit or implicit hydrogens should be same for both molecules
mol_reference = handle_hydrogens(mol_reference, heavy_only=heavy_only)
mol_probe = handle_hydrogens(mol_probe, heavy_only=heavy_only)

feature_map_score = get_feature_map_score(
mol_small=mol_reference,
mol_large=mol_probe,
conf_id_small=conf_id_reference,
conf_id_large=conf_id_probe,
score_mode=score_mode,
)
protrusion_distance = ShapeProtrudeDist(
mol1=mol_reference,
Expand All @@ -94,23 +98,23 @@ def get_sucos_score(


def check_sucos(mol_pred: Mol, mol_true: Mol, sucos_threshold: float = 0.4) -> dict[str, dict[str, bool | float]]:
"""Calculate RMSD and related metrics between predicted molecule and closest ground truth molecule.
"""Calculate SuCOS and related metrics between predicted molecule and closest ground truth molecule.
Args:
mol_pred: Predicted molecule (docked ligand) with exactly one conformer.
mol_true: Ground truth molecule (crystal ligand) with at least one conformer. If multiple conformers are
present, the lowest RMSD will be reported.
rmsd_threshold: Threshold in angstrom for reporting whether RMSD is within threshold. Defaults to 2.0.
heavy_only: Whether to only consider heavy atoms for RMSD calculation. Defaults to True.
present, the highest SuCOS will be reported.
sucos_threshold: Threshold in angstrom for reporting whether SuCOS is within threshold. Defaults to 0.4.
heavy_only: Whether to only consider heavy atoms for SuCOS calculation. Defaults to True.
Returns:
PoseBusters results dictionary.
"""
assert isinstance(mol_true, Mol), "Ground truth molecule is missing."
assert isinstance(mol_pred, Mol), "Predicted molecule is missing."
num_conf = mol_true.GetNumConformers()
assert num_conf > 0, "Crystal ligand needs at least one conformer."
assert mol_pred.GetNumConformers() == 1, "Docked ligand should only have one conformer."
assert num_conf > 0, "Ground truth molecule needs at least one conformer."
assert mol_pred.GetNumConformers() == 1, "Predicted molecule should only have one conformer."

try:
SanitizeMol(mol_pred)
Expand All @@ -125,3 +129,12 @@ def check_sucos(mol_pred: Mol, mol_true: Mol, sucos_threshold: float = 0.4) -> d

results = {"sucos": best_sucos, "sucos_within_threshold": sucos_within_threshold}
return {"results": results}


def handle_hydrogens(mol: Mol, heavy_only: bool | None = True) -> Mol:
"""Remove, add or do not modify hydrogens in a molecule."""
if heavy_only is None:
return mol
if heavy_only:
return RemoveHs(mol)
return AddHs(mol, addCoords=True)
4 changes: 2 additions & 2 deletions posebusters/posebusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ def bust(
Returns:
Pandas dataframe with results.
"""
mol_pred = [mol_pred] if isinstance(mol_pred, (Mol, Path, str)) else mol_pred
mol_pred_list: Iterable[Mol | Path | str] = [mol_pred] if isinstance(mol_pred, (Mol, Path, str)) else mol_pred

columns = ["mol_pred", "mol_true", "mol_cond"]
self.file_paths = pd.DataFrame([[mol_pred, mol_true, mol_cond] for mol_pred in mol_pred], columns=columns)
self.file_paths = pd.DataFrame([[mol_pred, mol_true, mol_cond] for mol_pred in mol_pred_list], columns=columns)

results_gen = self._run()

Expand Down
3 changes: 3 additions & 0 deletions posebusters/tools/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def safe_load_mol(path: Path, load_all: bool = False, **load_params) -> Mol | No
Returns:
Molecule object or None if loading failed.
"""
if isinstance(path, Mol):
return path

try:
path = _check_path(path)
with CaptureLogger():
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,13 @@ def mol_cond_7ztl_bcn():
@pytest.fixture
def mol_disconnnected_atoms():
return MolFromMolFile("tests/conftest/mol_disconnected_atoms.sdf", sanitize=True)


@pytest.fixture
def mol_small_7brv_f5r_7wb6_f5r():
return MolFromMolFile("tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_smaller_ligand.sdf", removeHs=True)


@pytest.fixture
def mol_large_7brv_f5r_7wb6_f5r():
return MolFromMolFile("tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_larger_ligand.sdf", removeHs=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
right_molecule_with_hydrogens
RDKit 3D

17 17 0 0 0 0 0 0 0 0999 V2000
57.8140 43.3950 19.4830 N 0 0 0 0 0 0 0 0 0 0 0 0
55.9570 43.8700 15.0390 C 0 0 0 0 0 0 0 0 0 0 0 0
56.2270 43.1270 16.1730 C 0 0 0 0 0 0 0 0 0 0 0 0
56.8350 43.7120 17.2820 C 0 0 0 0 0 0 0 0 0 0 0 0
57.1250 42.9160 18.5000 C 0 0 0 0 0 0 0 0 0 0 0 0
57.1620 45.0630 17.2290 C 0 0 0 0 0 0 0 0 0 0 0 0
56.8990 45.8100 16.0990 C 0 0 0 0 0 0 0 0 0 0 0 0
56.3030 45.2030 15.0180 C 0 0 0 0 0 0 0 0 0 0 0 0
56.6450 41.6610 18.5400 N 0 0 0 0 0 0 0 0 0 0 0 0
55.9500 46.2400 13.4670 Br 0 0 0 0 0 0 0 0 0 0 0 0
58.1060 44.1550 19.3610 H 0 0 0 0 0 0 0 0 0 0 0 0
55.5430 43.4720 14.2930 H 0 0 0 0 0 0 0 0 0 0 0 0
55.9960 42.2150 16.1950 H 0 0 0 0 0 0 0 0 0 0 0 0
57.5680 45.4740 17.9720 H 0 0 0 0 0 0 0 0 0 0 0 0
57.1240 46.7240 16.0670 H 0 0 0 0 0 0 0 0 0 0 0 0
56.1730 41.3460 17.8580 H 0 0 0 0 0 0 0 0 0 0 0 0
56.8010 41.1510 19.2480 H 0 0 0 0 0 0 0 0 0 0 0 0
1 5 2 3
1 11 1 0
2 3 2 0
2 8 1 0
2 12 1 0
3 4 1 0
3 13 1 0
4 5 1 0
4 6 2 0
5 9 1 0
6 7 1 0
6 14 1 0
7 8 2 0
7 15 1 0
8 10 1 0
9 16 1 0
9 17 1 0
M END
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
left_molecule_without_hydrogens
RDKit 3D

10 10 0 0 0 0 0 0 0 0999 V2000
57.7298 43.4513 19.4458 N 0 0 0 0 0 0 0 0 0 0 0 0
56.8728 45.7841 16.0476 C 0 0 0 0 0 0 0 0 0 0 0 0
57.1267 45.0725 17.1867 C 0 0 0 0 0 0 0 0 0 0 0 0
56.8325 43.7291 17.2368 C 0 0 0 0 0 0 0 0 0 0 0 0
57.0936 42.9364 18.4399 C 0 0 0 0 0 0 0 0 0 0 0 0
56.1174 43.1560 16.2069 C 0 0 0 0 0 0 0 0 0 0 0 0
55.8545 43.8496 15.0648 C 0 0 0 0 0 0 0 0 0 0 0 0
56.2497 45.1542 15.0115 C 0 0 0 0 0 0 0 0 0 0 0 0
56.7194 41.6618 18.4761 N 0 0 0 0 0 0 0 0 0 0 0 0
55.9070 46.2021 13.4738 Br 0 0 0 0 0 0 0 0 0 0 0 0
1 5 2 3
2 3 2 0
2 8 1 0
3 4 1 0
4 5 1 0
4 6 2 0
5 9 1 0
6 7 1 0
7 8 2 0
8 10 1 0
M END
19 changes: 18 additions & 1 deletion tests/test_modules/test_sucos.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import annotations

import pytest
from rdkit.Chem.rdmolops import AddHs, RemoveHs

from posebusters.modules.sucos import check_sucos


def test_check_sucos(mol_rq3_x00, mol_rq3_x01, mol_rq3_x10):
def test_check_sucos(mol_rq3_x00, mol_rq3_x01, mol_rq3_x10, mol_small_7brv_f5r_7wb6_f5r, mol_large_7brv_f5r_7wb6_f5r):
out = check_sucos(mol_rq3_x00, mol_rq3_x00)
assert out["results"]["sucos"] == pytest.approx(1.0, abs=1e-6)

Expand All @@ -16,6 +17,22 @@ def test_check_sucos(mol_rq3_x00, mol_rq3_x01, mol_rq3_x10):
assert out["results"]["sucos"] == pytest.approx(0.0, abs=1e-6)


def test_check_sucos_hydrogens(mol_small_7brv_f5r_7wb6_f5r, mol_large_7brv_f5r_7wb6_f5r):
out = check_sucos(
AddHs(mol_large_7brv_f5r_7wb6_f5r, addCoords=True), AddHs(mol_small_7brv_f5r_7wb6_f5r, addCoords=True)
)
assert out["results"]["sucos"] <= 1.0

out = check_sucos(RemoveHs(mol_large_7brv_f5r_7wb6_f5r), RemoveHs(mol_small_7brv_f5r_7wb6_f5r))
assert out["results"]["sucos"] <= 1.0

out = check_sucos(AddHs(mol_large_7brv_f5r_7wb6_f5r, addCoords=True), RemoveHs(mol_small_7brv_f5r_7wb6_f5r))
assert out["results"]["sucos"] <= 1.0

out = check_sucos(RemoveHs(mol_small_7brv_f5r_7wb6_f5r), AddHs(mol_large_7brv_f5r_7wb6_f5r, addCoords=True))
assert out["results"]["sucos"] <= 1.0


def test_check_sucos_multiple_ground_truth(mol_one_true_1w1p, mol_true_1w1p):
out = check_sucos(mol_one_true_1w1p, mol_true_1w1p)
assert out["results"]["sucos"] == pytest.approx(1.0, abs=1e-6)
15 changes: 14 additions & 1 deletion tests/test_posebusters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import math

from rdkit.Chem.rdmolfiles import MolFromSmiles
from rdkit.Chem.rdmolfiles import MolFromMolFile, MolFromPDBFile, MolFromSmiles

from posebusters import PoseBusters

Expand Down Expand Up @@ -107,3 +107,16 @@ def test_bust_gen() -> None:
posebusters = PoseBusters("gen")
result = posebusters.bust(mol_pred=mol_larger, mol_true=mol_smaller, mol_cond=mol_cond_smaller, full_report=True)
assert result["sucos"][0] > 0.2


def test_bust_loaded_mols() -> None:
posebuster = PoseBusters("redock")

mol_pred = MolFromMolFile(mol_pred_1ia1)
mol_true = MolFromMolFile(mol_true_1ia1)
mol_cond = MolFromPDBFile(mol_cond_1ia1)

df = posebuster.bust([mol_pred], mol_true, mol_cond)
assert df["mol_pred_loaded"].all()
assert df["mol_true_loaded"].all()
assert df["mol_cond_loaded"].all()

0 comments on commit 77433ea

Please sign in to comment.