Skip to content

Commit

Permalink
Bug fixes SuCOS score (#42)
Browse files Browse the repository at this point in the history
SuCOS module:
- ensure score not larger than 1
- handle molecules without features - will only use shape overlap
- add references to RDKit blog and papers for feature map-based similarity metrics
  • Loading branch information
maabuu authored Jul 10, 2024
1 parent 77433ea commit 7d9f377
Show file tree
Hide file tree
Showing 12 changed files with 599 additions and 13 deletions.
2 changes: 1 addition & 1 deletion posebusters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
"check_volume_overlap",
]

__version__ = "0.2.14"
__version__ = "0.2.15"
36 changes: 25 additions & 11 deletions posebusters/modules/sucos.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,30 @@ def get_feature_map_score(
) -> float:
"""Calculate the feature map score between two molecules.
Good introduction:
https://greglandrum.github.io/rdkit-blog/posts/2023-02-24-using-feature-maps.html
References:
Putta et al, 2005: https://pubs.acs.org/doi/abs/10.1021/jm049066l
Landrum et al, 2007: https://link.springer.com/article/10.1007/s10822-006-9085-8
Landrum Greg, 2017: https://rdkit.blogspot.com/2017/11/using-feature-maps.html
Landrum Greg, 2023: https://greglandrum.github.io/rdkit-blog/posts/2023-02-24-using-feature-maps.html
"""

# raw features
# list features
features_small = [f for f in FACTORY.GetFeaturesForMol(mol_small, confId=conf_id_small) if f.GetFamily() in KEEP]
features_large = [f for f in FACTORY.GetFeaturesForMol(mol_large, confId=conf_id_large) if f.GetFamily() in KEEP]

# feature map based on small molecule
# create feature map based on small molecule
feature_map = FeatMaps.FeatMap(feats=features_small, weights=[1] * len(features_small), params=PARAMETERS)
feature_map.scoreMode = FeatMaps.FeatMapScoreMode.Best

# score feature in large molecule present in small molecule
feature_map_score = feature_map.ScoreFeats(features_large) / min(feature_map.GetNumFeatures(), len(features_large))
# score features of large molecule present in small molecule
feature_score = feature_map.ScoreFeats(features_large)

return feature_map_score
# normalize score
normalization_constant = min(feature_map.GetNumFeatures(), len(features_large))
if normalization_constant > 0:
return feature_score / normalization_constant

return np.nan


def get_sucos_score(
Expand All @@ -68,9 +76,9 @@ def get_sucos_score(
Returns:
SuCOS score.
Notes:
SuCOS described in https://chemrxiv.org/engage/chemrxiv/article-details/60c741a99abda23230f8bed5
Adapted from https://github.com/MarcMoesser/SuCOS/blob/master/calc_SuCOS_normalized.py
References:
Leung et al, 2019: https://chemrxiv.org/engage/chemrxiv/article-details/60c741a99abda23230f8bed5
Moesser, Marc: https://github.com/MarcMoesser/SuCOS/blob/master/calc_SuCOS_normalized.py
"""

# explicit or implicit hydrogens should be same for both molecules
Expand All @@ -92,7 +100,13 @@ def get_sucos_score(
vdwScale=0.8,
ignoreHs=True,
)
sucos_score = float(0.5 * feature_map_score + 0.5 * (1 - protrusion_distance))
shape_overlap = max(1 - protrusion_distance, 0)

# if no features, base on shape alone
if not np.isnan(feature_map_score):
sucos_score = 0.5 * feature_map_score + 0.5 * shape_overlap
else:
sucos_score = shape_overlap

return sucos_score

Expand Down
30 changes: 30 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,33 @@ def mol_small_7brv_f5r_7wb6_f5r():
@pytest.fixture
def mol_large_7brv_f5r_7wb6_f5r():
return MolFromMolFile("tests/conftest/7BRV_F5R_7WB6_F5R/7BRV_F5R_7WB6_F5R_larger_ligand.sdf", removeHs=True)


@pytest.fixture
def mol_065():
return MolFromMolFile("tests/conftest/mol_065_ideal.sdf")


@pytest.fixture
def mol_065_left():
return MolFromMolFile("tests/conftest/mol_065_left.sdf")


@pytest.fixture
def mol_065_right():
return MolFromMolFile("tests/conftest/mol_065_right.sdf")


@pytest.fixture
def mol_TMO():
return MolFromMolFile("tests/conftest/mol_TMO.sdf")


@pytest.fixture
def mol_2YU():
return MolFromMolFile("tests/conftest/mol_2YU.mol")


@pytest.fixture
def mol_HQT():
return MolFromMolFile("tests/conftest/mol_HQT.mol")
174 changes: 174 additions & 0 deletions tests/conftest/mol_065_ideal.sdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
065
-OEChem-05312419063D

76 80 0 1 0 0 0 0 0999 V2000
-2.8150 4.1190 0.2950 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.1720 5.3090 0.4820 O 0 0 0 0 0 0 0 0 0 0 0 0
-1.2910 5.4520 -0.6470 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.0170 4.0940 -1.0410 O 0 0 0 0 0 0 0 0 0 0 0 0
-2.1080 3.3750 -0.6460 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.5630 2.1200 -1.0160 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.7170 1.6090 -0.4510 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.4190 2.3480 0.4820 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.9700 3.6010 0.8570 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.2950 0.0130 -0.9240 S 0 0 0 0 0 0 0 0 0 0 0 0
-5.6890 -0.0070 -0.6490 O 0 0 0 0 0 0 0 0 0 0 0 0
-3.7530 -0.2440 -2.2120 O 0 0 0 0 0 0 0 0 0 0 0 0
-3.6000 -1.0840 0.1030 N 0 0 3 0 0 0 0 0 0 0 0 0
-4.2760 -1.4490 1.3510 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.0960 -2.7210 1.1340 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.7010 -3.1720 2.4650 C 0 0 0 0 0 0 0 0 0 0 0 0
-6.2190 -2.4400 0.1330 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.3100 -1.6900 -0.2360 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.1840 -0.9000 0.4330 C 0 0 1 0 0 0 0 0 0 0 0 0
-1.2960 -1.0190 1.8530 O 0 0 0 0 0 0 0 0 0 0 0 0
0.1680 -1.4550 -0.0200 C 0 0 1 0 0 0 0 0 0 0 0 0
1.2440 -0.6240 0.5250 N 0 0 0 0 0 0 0 0 0 0 0 0
2.4390 -0.5760 -0.0970 C 0 0 0 0 0 0 0 0 0 0 0 0
2.6230 -1.2210 -1.1100 O 0 0 0 0 0 0 0 0 0 0 0 0
3.4280 0.1880 0.4040 O 0 0 0 0 0 0 0 0 0 0 0 0
4.6880 0.1920 -0.3190 C 0 0 1 0 0 0 0 0 0 0 0 0
4.7090 1.3310 -1.3540 C 0 0 0 0 0 0 0 0 0 0 0 0
5.7370 2.2500 -0.9600 O 0 0 0 0 0 0 0 0 0 0 0 0
6.7100 1.5220 -0.1860 C 0 0 1 0 0 0 0 0 0 0 0 0
7.2830 2.4070 0.7860 O 0 0 0 0 0 0 0 0 0 0 0 0
6.5010 2.4050 1.9850 C 0 0 0 0 0 0 0 0 0 0 0 0
5.3340 1.4170 1.7800 C 0 0 0 0 0 0 0 0 0 0 0 0
5.8550 0.5100 0.6380 C 0 0 1 0 0 0 0 0 0 0 0 0
0.3250 -2.8900 0.4860 C 0 0 0 0 0 0 0 0 0 0 0 0
2.7810 -3.3470 0.6430 C 0 0 0 0 0 0 0 0 0 0 0 0
3.9500 -3.8870 0.1400 C 0 0 0 0 0 0 0 0 0 0 0 0
3.9370 -4.5590 -1.0680 C 0 0 0 0 0 0 0 0 0 0 0 0
2.7550 -4.6920 -1.7720 C 0 0 0 0 0 0 0 0 0 0 0 0
1.5860 -4.1520 -1.2700 C 0 0 0 0 0 0 0 0 0 0 0 0
1.5990 -3.4790 -0.0620 C 0 0 0 0 0 0 0 0 0 0 0 0
7.4640 1.0400 -0.8090 H 0 0 0 0 0 0 0 0 0 0 0 0
6.4040 -0.3710 0.9720 H 0 0 0 0 0 0 0 0 0 0 0 0
4.8470 -0.7690 -0.8080 H 0 0 0 0 0 0 0 0 0 0 0 0
-6.3460 -2.3860 2.8560 H 0 0 0 0 0 0 0 0 0 0 0 0
-6.2860 -4.0790 2.3100 H 0 0 0 0 0 0 0 0 0 0 0 0
-4.9010 -3.3730 3.1780 H 0 0 0 0 0 0 0 0 0 0 0 0
-4.4510 -3.5080 0.7430 H 0 0 0 0 0 0 0 0 0 0 0 0
-6.8650 -1.6530 0.5240 H 0 0 0 0 0 0 0 0 0 0 0 0
-5.7890 -2.1180 -0.8150 H 0 0 0 0 0 0 0 0 0 0 0 0
-6.8040 -3.3460 -0.0220 H 0 0 0 0 0 0 0 0 0 0 0 0
-3.5320 -1.6240 2.1290 H 0 0 0 0 0 0 0 0 0 0 0 0
-4.9370 -0.6380 1.6570 H 0 0 0 0 0 0 0 0 0 0 0 0
-2.2890 -2.7210 0.1170 H 0 0 0 0 0 0 0 0 0 0 0 0
-2.1740 -1.6730 -1.3170 H 0 0 0 0 0 0 0 0 0 0 0 0
-1.2590 0.1500 0.1500 H 0 0 0 0 0 0 0 0 0 0 0 0
-1.2380 -1.9280 2.1770 H 0 0 0 0 0 0 0 0 0 0 0 0
0.2180 -1.4460 -1.1090 H 0 0 0 0 0 0 0 0 0 0 0 0
0.3640 -2.8890 1.5750 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.5240 -3.4880 0.1530 H 0 0 0 0 0 0 0 0 0 0 0 0
2.7900 -2.8260 1.5890 H 0 0 0 0 0 0 0 0 0 0 0 0
4.8730 -3.7830 0.6910 H 0 0 0 0 0 0 0 0 0 0 0 0
4.8510 -4.9810 -1.4610 H 0 0 0 0 0 0 0 0 0 0 0 0
2.7450 -5.2160 -2.7160 H 0 0 0 0 0 0 0 0 0 0 0 0
0.6630 -4.2550 -1.8210 H 0 0 0 0 0 0 0 0 0 0 0 0
1.0980 -0.1090 1.3340 H 0 0 0 0 0 0 0 0 0 0 0 0
4.9300 0.9290 -2.3420 H 0 0 0 0 0 0 0 0 0 0 0 0
3.7440 1.8380 -1.3650 H 0 0 0 0 0 0 0 0 0 0 0 0
7.1150 2.0820 2.8260 H 0 0 0 0 0 0 0 0 0 0 0 0
6.1110 3.4050 2.1740 H 0 0 0 0 0 0 0 0 0 0 0 0
5.1550 0.8360 2.6850 H 0 0 0 0 0 0 0 0 0 0 0 0
4.4310 1.9440 1.4730 H 0 0 0 0 0 0 0 0 0 0 0 0
-5.3200 1.9450 0.9210 H 0 0 0 0 0 0 0 0 0 0 0 0
-4.5230 4.1780 1.5830 H 0 0 0 0 0 0 0 0 0 0 0 0
-1.7860 5.9920 -1.4540 H 0 0 0 0 0 0 0 0 0 0 0 0
-0.3710 5.9580 -0.3530 H 0 0 0 0 0 0 0 0 0 0 0 0
-2.0170 1.5410 -1.7450 H 0 0 0 0 0 0 0 0 0 0 0 0
1 9 1 0 0 0 0
1 2 1 0 0 0 0
1 5 2 0 0 0 0
2 3 1 0 0 0 0
3 4 1 0 0 0 0
3 74 1 0 0 0 0
3 75 1 0 0 0 0
4 5 1 0 0 0 0
5 6 1 0 0 0 0
6 7 2 0 0 0 0
6 76 1 0 0 0 0
7 10 1 0 0 0 0
7 8 1 0 0 0 0
8 9 2 0 0 0 0
8 72 1 0 0 0 0
9 73 1 0 0 0 0
10 13 1 0 0 0 0
10 11 2 0 0 0 0
10 12 2 0 0 0 0
13 14 1 0 0 0 0
13 18 1 0 0 0 0
14 15 1 0 0 0 0
14 51 1 0 0 0 0
14 52 1 0 0 0 0
15 16 1 0 0 0 0
15 17 1 0 0 0 0
15 47 1 0 0 0 0
16 44 1 0 0 0 0
16 45 1 0 0 0 0
16 46 1 0 0 0 0
17 48 1 0 0 0 0
17 49 1 0 0 0 0
17 50 1 0 0 0 0
18 19 1 0 0 0 0
18 53 1 0 0 0 0
18 54 1 0 0 0 0
19 20 1 0 0 0 0
19 21 1 0 0 0 0
19 55 1 0 0 0 0
20 56 1 0 0 0 0
21 34 1 0 0 0 0
21 22 1 0 0 0 0
21 57 1 0 0 0 0
22 23 1 0 0 0 0
22 65 1 0 0 0 0
23 24 2 0 0 0 0
23 25 1 0 0 0 0
25 26 1 0 0 0 0
26 27 1 0 0 0 0
26 33 1 0 0 0 0
26 43 1 0 0 0 0
27 28 1 0 0 0 0
27 66 1 0 0 0 0
27 67 1 0 0 0 0
28 29 1 0 0 0 0
29 30 1 0 0 0 0
29 33 1 0 0 0 0
29 41 1 0 0 0 0
30 31 1 0 0 0 0
31 32 1 0 0 0 0
31 68 1 0 0 0 0
31 69 1 0 0 0 0
32 33 1 0 0 0 0
32 70 1 0 0 0 0
32 71 1 0 0 0 0
33 42 1 0 0 0 0
34 40 1 0 0 0 0
34 58 1 0 0 0 0
34 59 1 0 0 0 0
35 40 1 0 0 0 0
35 36 2 0 0 0 0
35 60 1 0 0 0 0
36 37 1 0 0 0 0
36 61 1 0 0 0 0
37 38 2 0 0 0 0
37 62 1 0 0 0 0
38 39 1 0 0 0 0
38 63 1 0 0 0 0
39 40 2 0 0 0 0
39 64 1 0 0 0 0
M END
> <OPENEYE_ISO_SMILES>
CC(C)CN(C[C@H]([C@H](Cc1ccccc1)NC(=O)O[C@H]2CO[C@@H]3[C@H]2CCO3)O)S(=O)(=O)c4ccc5c(c4)OCO5

> <OPENEYE_INCHI>
InChI=1S/C28H36N2O9S/c1-18(2)14-30(40(33,34)20-8-9-24-25(13-20)38-17-37-24)15-23(31)22(12-19-6-4-3-5-7-19)29-28(32)39-26-16-36-27-21(26)10-11-35-27/h3-9,13,18,21-23,26-27,31H,10-12,14-17H2,1-2H3,(H,29,32)/t21-,22-,23+,26-,27+/m0/s1

> <OPENEYE_INCHIKEY>
HNEIRZJZTYYDES-VPZBFSRCSA-N

> <FORMULA>
C28H36N2O9S

$$$$
90 changes: 90 additions & 0 deletions tests/conftest/mol_065_left.sdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@

RDKit 3D

40 44 0 0 0 0 0 0 0 0999 V2000
15.7990 34.9010 17.7930 C 0 0 0 0 0 0 0 0 0 0 0 0
15.2650 35.8150 18.6450 O 0 0 0 0 0 0 0 0 0 0 0 0
15.8940 37.0150 18.3890 C 0 0 0 0 0 0 0 0 0 0 0 0
16.7990 36.8680 17.3670 O 0 0 0 0 0 0 0 0 0 0 0 0
16.7810 35.5430 17.0400 C 0 0 0 0 0 0 0 0 0 0 0 0
17.5590 34.8700 16.1090 C 0 0 0 0 0 0 0 0 0 0 0 0
17.2820 33.5300 15.9640 C 0 0 0 0 0 0 0 0 0 0 0 0
16.3260 32.9060 16.7240 C 0 0 0 0 0 0 0 0 0 0 0 0
15.4960 33.5800 17.6090 C 0 0 0 0 0 0 0 0 0 0 0 0
18.3310 32.5530 14.9410 S 0 0 0 0 0 0 0 0 0 0 0 0
17.5450 31.4520 14.4340 O 0 0 0 0 0 0 0 0 0 0 0 0
18.9800 33.4600 14.0140 O 0 0 0 0 0 0 0 0 0 0 0 0
19.5450 31.9210 15.8580 N 0 0 0 0 0 0 0 0 0 0 0 0
20.4050 32.8770 16.5370 C 0 0 0 0 0 0 0 0 0 0 0 0
21.8880 32.5200 16.5770 C 0 0 0 0 0 0 0 0 0 0 0 0
22.4580 32.4660 15.1510 C 0 0 0 0 0 0 0 0 0 0 0 0
22.6430 33.5450 17.4220 C 0 0 0 0 0 0 0 0 0 0 0 0
19.2780 30.6780 16.5480 C 0 0 0 0 0 0 0 0 0 0 0 0
20.1130 29.5170 15.9900 C 0 0 0 0 0 0 0 0 0 0 0 0
20.0080 28.4050 16.9070 O 0 0 0 0 0 0 0 0 0 0 0 0
19.7050 29.0580 14.6090 C 0 0 0 0 0 0 0 0 0 0 0 0
20.6950 28.1640 14.0840 N 0 0 0 0 0 0 0 0 0 0 0 0
21.5680 28.4800 13.1080 C 0 0 0 0 0 0 0 0 0 0 0 0
21.6490 29.5530 12.5210 O 0 0 0 0 0 0 0 0 0 0 0 0
22.3930 27.4380 12.8720 O 0 0 0 0 0 0 0 0 0 0 0 0
23.2150 27.5130 11.6860 C 0 0 0 0 0 0 0 0 0 0 0 0
24.5420 26.8680 12.0230 C 0 0 0 0 0 0 0 0 0 0 0 0
24.2740 25.4600 11.9680 O 0 0 0 0 0 0 0 0 0 0 0 0
23.3070 25.2350 10.9570 C 0 0 0 0 0 0 0 0 0 0 0 0
22.2570 24.4470 11.5030 O 0 0 0 0 0 0 0 0 0 0 0 0
21.0310 25.1440 11.5490 C 0 0 0 0 0 0 0 0 0 0 0 0
21.2000 26.3740 10.6240 C 0 0 0 0 0 0 0 0 0 0 0 0
22.6950 26.5770 10.5860 C 0 0 0 0 0 0 0 0 0 0 0 0
18.3500 28.3800 14.5140 C 0 0 0 0 0 0 0 0 0 0 0 0
17.8930 26.5890 12.7950 C 0 0 0 0 0 0 0 0 0 0 0 0
17.6510 26.2220 11.4790 C 0 0 0 0 0 0 0 0 0 0 0 0
17.4620 27.1500 10.4950 C 0 0 0 0 0 0 0 0 0 0 0 0
17.6060 28.5090 10.7860 C 0 0 0 0 0 0 0 0 0 0 0 0
17.8720 28.8930 12.1120 C 0 0 0 0 0 0 0 0 0 0 0 0
18.0130 27.9390 13.1060 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0
2 3 1 0
3 4 1 0
1 5 2 0
4 5 1 0
5 6 1 0
6 7 2 0
7 8 1 0
1 9 1 0
8 9 2 0
7 10 1 0
10 11 2 0
10 12 2 0
10 13 1 0
13 14 1 0
14 15 1 0
15 16 1 0
15 17 1 0
13 18 1 0
18 19 1 0
19 20 1 0
19 21 1 0
21 22 1 0
22 23 1 0
23 24 2 0
23 25 1 0
25 26 1 0
26 27 1 0
27 28 1 0
28 29 1 0
29 30 1 0
30 31 1 0
31 32 1 0
26 33 1 0
29 33 1 0
32 33 1 0
21 34 1 0
35 36 2 0
36 37 1 0
37 38 2 0
38 39 1 0
34 40 1 0
35 40 1 0
39 40 2 0
M END
$$$$
Loading

0 comments on commit 7d9f377

Please sign in to comment.