changes

vsag96 · Nov 21, 2018 · c294a6e · c294a6e
1 parent 2a72d8e
commit c294a6e
Show file tree

Hide file tree

Showing 9 changed files with 96 additions and 90 deletions.
diff --git a/contrib/visualization/utils.py b/contrib/visualization/utils.py
@@ -0,0 +1,72 @@
+import nglview
+import tempfile
+import os
+import mdtraj as md
+import numpy as np
+import tempfile
+from rdkit import Chem
+from rdkit.Chem import Draw
+from itertools import islice
+from IPython.display import Image, HTML, display
+
+def combine_mdtraj(protein, ligand):
+  chain = protein.topology.add_chain()
+  residue = protein.topology.add_residue("LIG", chain, resSeq=1)
+  for atom in ligand.topology.atoms:
+      protein.topology.add_atom(atom.name, atom.element, residue)
+  protein.xyz = np.hstack([protein.xyz, ligand.xyz])
+  protein.topology.create_standard_bonds()
+  return protein
+
+def visualize_complex(complex_mdtraj):
+  ligand_atoms = [a.index for a in complex_mdtraj.topology.atoms if "LIG" in str(a.residue)]
+  binding_pocket_atoms = md.compute_neighbors(complex_mdtraj, 0.5, ligand_atoms)[0]
+  binding_pocket_residues = list(set([complex_mdtraj.topology.atom(a).residue.resSeq for a in binding_pocket_atoms]))
+  binding_pocket_residues = [str(r) for r in binding_pocket_residues]
+  binding_pocket_residues = " or ".join(binding_pocket_residues)
+
+  traj = nglview.MDTrajTrajectory( complex_mdtraj ) # load file from RCSB PDB
+  ngltraj = nglview.NGLWidget( traj )
+  ngltraj.representations = [
+  { "type": "cartoon", "params": {
+  "sele": "protein", "color": "residueindex"
+  } },
+  { "type": "licorice", "params": {
+  "sele": "(not hydrogen) and (%s)" %  binding_pocket_residues
+  } },
+  { "type": "ball+stick", "params": {
+  "sele": "LIG"
+  } }
+  ]
+  return ngltraj
+
+def visualize_ligand(ligand_mdtraj):
+  traj = nglview.MDTrajTrajectory( ligand_mdtraj ) # load file from RCSB PDB
+  ngltraj = nglview.NGLWidget( traj )
+  ngltraj.representations = [
+    { "type": "ball+stick", "params": {"sele": "all" } } ]
+  return ngltraj
+
+def convert_lines_to_mdtraj(molecule_lines):
+  tempdir = tempfile.mkdtemp()
+  molecule_file = os.path.join(tempdir, "molecule.pdb")
+  with open(molecule_file, "wb") as f:
+    f.writelines(molecule_lines)
+  molecule_mdtraj = md.load(molecule_file)
+  return molecule_mdtraj
+
+def display_images(filenames):
+    """Helper to pretty-print images."""
+    imagesList=''.join(
+        ["<img style='width: 140px; margin: 0px; float: left; border: 1px solid black;' src='%s' />"
+         % str(s) for s in sorted(filenames)])
+    display(HTML(imagesList))
+
+def mols_to_pngs(mols, basename="test"):
+    """Helper to write RDKit mols to png files."""
+    filenames = []
+    for i, mol in enumerate(mols):
+        filename = "%s%d.png" % (basename, i)
+        Draw.MolToFile(mol, filename)
+        filenames.append(filename)
+    return filenames
diff --git a/deepchem/feat/one_hot.py b/deepchem/feat/one_hot.py
@@ -1,6 +1,5 @@
 import numpy as np
 from deepchem.feat import Featurizer
-from rdkit import Chem
 
 zinc_charset = [
     ' ', '#', ')', '(', '+', '-', '/', '1', '3', '2', '5', '4', '7', '6', '8',
@@ -42,6 +41,7 @@ def featurize(self, mols, verbose=True, log_every_n=1000):
     obj
       numpy array of features
     """
+    from rdkit import Chem
     smiles = [Chem.MolToSmiles(mol) for mol in mols]
     if self.charset is None:
       self.charset = self._create_charset(smiles)

diff --git a/deepchem/splits/tests/test_splitter.py b/deepchem/splits/tests/test_splitter.py
@@ -4,8 +4,6 @@
 from __future__ import division
 from __future__ import unicode_literals
 
-from rdkit.Chem.Fingerprints import FingerprintMols
-
 __author__ = "Bharath Ramsundar, Aneesh Pappu"
 __copyright__ = "Copyright 2016, Stanford University"
 __license__ = "MIT"
@@ -16,7 +14,6 @@
 import deepchem as dc
 from deepchem.data import NumpyDataset
 from deepchem.splits import IndexSplitter
-from rdkit import Chem, DataStructs
 
 
 class TestSplitters(unittest.TestCase):

diff --git a/deepchem/utils/__init__.py b/deepchem/utils/__init__.py
@@ -15,8 +15,6 @@
 import tarfile
 import zipfile
 
-from rdkit import Chem
-from rdkit.Chem.Scaffolds import MurckoScaffold
 
 try:
   from urllib.request import urlretrieve  # Python 3
@@ -152,5 +150,6 @@ def get_scaffold(self, mol):
     mols : array_like
         Molecules.
     """
+    from rdkit.Chem.Scaffolds import MurckoScaffold
     return MurckoScaffold.MurckoScaffoldSmiles(
         mol=mol, includeChirality=self.include_chirality)
diff --git a/deepchem/utils/conformers.py b/deepchem/utils/conformers.py
@@ -8,10 +8,6 @@
 
 import numpy as np
 
-from rdkit import Chem
-from rdkit.Chem import AllChem
-
-
 class ConformerGenerator(object):
   """
   Generate molecule conformers.
@@ -106,6 +102,8 @@ def embed_molecule(self, mol):
     mol : RDKit Mol
         Molecule.
     """
+    from rdkit import Chem
+    from rdkit.Chem import AllChem
     mol = Chem.AddHs(mol)  # add hydrogens
     n_confs = self.max_conformers * self.pool_multiplier
     AllChem.EmbedMultipleConfs(mol, numConfs=n_confs, pruneRmsThresh=-1.)
@@ -124,6 +122,7 @@ def get_molecule_force_field(self, mol, conf_id=None, **kwargs):
     kwargs : dict, optional
         Keyword arguments for force field constructor.
     """
+    from rdkit.Chem import AllChem
     if self.force_field == 'uff':
       ff = AllChem.UFFGetMoleculeForceField(
           mol, confId=conf_id, **kwargs)
@@ -218,6 +217,7 @@ def prune_conformers(self, mol):
 
     # create a new molecule to hold the chosen conformers
     # this ensures proper conformer IDs and energy-based ordering
+    from rdkit import Chem
     new = Chem.Mol(mol)
     new.RemoveAllConformers()
     conf_ids = [conf.GetId() for conf in mol.GetConformers()]
@@ -236,6 +236,7 @@ def get_conformer_rmsd(mol):
     mol : RDKit Mol
         Molecule.
     """
+    from rdkit.Chem import AllChem
     rmsd = np.zeros((mol.GetNumConformers(), mol.GetNumConformers()),
                     dtype=float)
     for i, ref_conf in enumerate(mol.GetConformers()):

diff --git a/deepchem/utils/rdkit_util.py b/deepchem/utils/rdkit_util.py
@@ -3,9 +3,6 @@
 import numpy as np
 import os
 
-from rdkit import Chem
-from rdkit.Chem import AllChem
-from rdkit.Chem import rdmolops
 
 try:
   from StringIO import StringIO
@@ -43,6 +40,7 @@ def add_hydrogens_to_mol(mol):
   """
   molecule_file = None
   try:
+    from rdkit import Chem
     pdbblock = Chem.MolToPDBBlock(mol)
     pdb_stringio = StringIO()
     pdb_stringio.write(pdbblock)
@@ -76,9 +74,16 @@ def compute_charges(mol):
   Attempt to compute Gasteiger Charges on Mol
   This also has the side effect of calculating charges on mol.
   The mol passed into this function has to already have been sanitized
-  :param mol: rdkit molecule
-  :return: molecule with charges
+
+  Params
+  ------
+  mol: rdkit molecule
+
+  Returns
+  -------
+  molecule with charges
   """
+  from rdkit.Chem import AllChem
   try:
     AllChem.ComputeGasteigerCharges(mol)
   except Exception as e:
@@ -101,6 +106,7 @@ def load_molecule(molecule_file,
   :param calc_charges: should add charges vis rdkit
   :return: (xyz, mol)
   """
+  from rdkit import Chem
   if ".mol2" in molecule_file:
     my_mol = Chem.MolFromMol2File(molecule_file, sanitize=False, removeHs=False)
   elif ".sdf" in molecule_file:
@@ -173,6 +179,7 @@ def write_molecule(mol, outfile, is_protein=False):
   :param outfile: filename to write mol to
   :param is_protein: is this molecule a protein?
   """
+  from rdkit import Chem
   if ".pdbqt" in outfile:
     writer = Chem.PDBWriter(outfile)
     writer.write(mol)
@@ -208,6 +215,8 @@ def merge_molecules_xyz(protein_xyz, ligand_xyz):
 
 
 def merge_molecules(ligand, protein):
+  """Helper method to merge ligand and protein molecules."""
+  from rdkit.Chem import rdmolops
   return Chem.rdmolops.CombineMols(ligand, protein)
 
 
@@ -370,6 +379,7 @@ def _get_rotatable_bonds(self):
     Taken from rdkit source to find which bonds are rotatable
     store rotatable bonds in (from_atom, to_atom)
     """
+    from rdkit import Chem
     pattern = Chem.MolFromSmarts(
         "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])("
         "[CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]="

diff --git a/deepchem/utils/save.py b/deepchem/utils/save.py
@@ -15,7 +15,6 @@
 import numpy as np
 import os
 import deepchem
-from rdkit import Chem
 import warnings
 from deepchem.utils.genomics import encode_bio_sequence as encode_sequence, encode_fasta_sequence as fasta_sequence, seq_one_hot_encode as seq_one_hotencode
 
@@ -72,6 +71,7 @@ def load_data(input_files, shard_size=None, verbose=True):
 
 def load_sdf_files(input_files, clean_mols):
   """Load SDF file into dataframe."""
+  from rdkit import Chem
   dataframes = []
   for input_file in input_files:
     # Tasks are stored in .sdf.csv file

diff --git a/deepchem/utils/test/test_rdkit_util.py b/deepchem/utils/test/test_rdkit_util.py
@@ -9,7 +9,6 @@
 from nose.tools import assert_true
 
 from deepchem.utils import rdkit_util
-from rdkit import Chem
 
 
 class TestRdkitUtil(unittest.TestCase):
@@ -99,6 +98,7 @@ def test_pdbqt_to_pdb(self):
     rdkit_util.write_molecule(mol, out_pdbqt, is_protein=True)
 
     pdb_block = rdkit_util.pdbqt_to_pdb(out_pdbqt)
+    from rdkit import Chem
     pdb_mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False)
 
     xyz, pdbqt_mol = rdkit_util.load_molecule(

diff --git a/deepchem/utils/visualization.py b/deepchem/utils/visualization.py
@@ -1,73 +0,0 @@
-# TODO(rbharath): Commenting out this file for now. Will be moved to a new repository.
-#import nglview
-#import tempfile
-#import os
-#import mdtraj as md
-#import numpy as np
-#import tempfile
-#from rdkit import Chem
-#from rdkit.Chem import Draw
-#from itertools import islice
-#from IPython.display import Image, HTML, display
-#
-#def combine_mdtraj(protein, ligand):
-#  chain = protein.topology.add_chain()
-#  residue = protein.topology.add_residue("LIG", chain, resSeq=1)
-#  for atom in ligand.topology.atoms:
-#      protein.topology.add_atom(atom.name, atom.element, residue)
-#  protein.xyz = np.hstack([protein.xyz, ligand.xyz])
-#  protein.topology.create_standard_bonds()
-#  return protein
-#
-#def visualize_complex(complex_mdtraj):
-#  ligand_atoms = [a.index for a in complex_mdtraj.topology.atoms if "LIG" in str(a.residue)]
-#  binding_pocket_atoms = md.compute_neighbors(complex_mdtraj, 0.5, ligand_atoms)[0]
-#  binding_pocket_residues = list(set([complex_mdtraj.topology.atom(a).residue.resSeq for a in binding_pocket_atoms]))
-#  binding_pocket_residues = [str(r) for r in binding_pocket_residues]
-#  binding_pocket_residues = " or ".join(binding_pocket_residues)
-#
-#  traj = nglview.MDTrajTrajectory( complex_mdtraj ) # load file from RCSB PDB
-#  ngltraj = nglview.NGLWidget( traj )
-#  ngltraj.representations = [
-#  { "type": "cartoon", "params": {
-#  "sele": "protein", "color": "residueindex"
-#  } },
-#  { "type": "licorice", "params": {
-#  "sele": "(not hydrogen) and (%s)" %  binding_pocket_residues
-#  } },
-#  { "type": "ball+stick", "params": {
-#  "sele": "LIG"
-#  } }
-#  ]
-#  return ngltraj
-#
-#def visualize_ligand(ligand_mdtraj):
-#  traj = nglview.MDTrajTrajectory( ligand_mdtraj ) # load file from RCSB PDB
-#  ngltraj = nglview.NGLWidget( traj )
-#  ngltraj.representations = [
-#    { "type": "ball+stick", "params": {"sele": "all" } } ]
-#  return ngltraj
-#
-#def convert_lines_to_mdtraj(molecule_lines):
-#  tempdir = tempfile.mkdtemp()
-#  molecule_file = os.path.join(tempdir, "molecule.pdb")
-#  with open(molecule_file, "wb") as f:
-#    f.writelines(molecule_lines)
-#  molecule_mdtraj = md.load(molecule_file)
-#  return molecule_mdtraj
-#
-#def display_images(filenames):
-#    """Helper to pretty-print images."""
-#    imagesList=''.join(
-#        ["<img style='width: 140px; margin: 0px; float: left; border: 1px solid black;' src='%s' />"
-#         % str(s) for s in sorted(filenames)])
-#    display(HTML(imagesList))
-#
-#def mols_to_pngs(mols, basename="test"):
-#    """Helper to write RDKit mols to png files."""
-#    filenames = []
-#    for i, mol in enumerate(mols):
-#        filename = "%s%d.png" % (basename, i)
-#        Draw.MolToFile(mol, filename)
-#        filenames.append(filename)
-#    return filenames