Skip to content

Commit

Permalink
add reactive site and polish the doc for molecule modules
Browse files Browse the repository at this point in the history
  • Loading branch information
qzhu2017 committed Oct 3, 2024
1 parent 755d00c commit d3ccc85
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 48 deletions.
118 changes: 92 additions & 26 deletions pyxtal/molecular_crystal.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def __init__(
compat, self.degrees = self.group.check_compatible(self.numMols, self.valid_orientations)

if not compat:
msg = f"Compoisition {self.numMols} not compatible with symmetry {self.group.number}"
msg = f"Inincompatible compoisition {self.numMols} with symmetry {self.group.number}"
raise Comp_CompatibilityError(msg)

self.set_volume()
Expand All @@ -151,13 +151,24 @@ def __repr__(self):

def set_sites(self, sites):
"""
initialize Wyckoff sites
Initialize and store symmetry sites for each molecule.
This function processes a list of symmetry sites, validates them against the expected
number of molecules, and stores them in the `self.sites` dictionary. Each entry in the
`sites` list can be either a dictionary or another type (list, etc.), and if no valid
site is provided for a molecule, `None` is assigned for that molecule's site.
Args:
sites: list
sites (list): A list of sites corresponding to `self.molecules`. They can be:
- A dictionary of site information (keys represent Wyckoff letters or
other identifiers, and values are the corresponding information).
- A list or other type representing site information.
- None, if no symmetry site information is available for that molecule.
"""
# Symmetry sites
# Initialize the self.sites dictionary to store site information
self.sites = {}

# Iterate over the molecules and their corresponding sites
for i, _mol in enumerate(self.molecules):
if sites is not None and sites[i] is not None and len(sites[i]) > 0:
self._check_consistency(sites[i], self.numMols[i])
Expand All @@ -172,22 +183,40 @@ def set_sites(self, sites):

def set_molecules(self, molecules, torsions):
"""
Get molecular information
Initialize and store molecular information.
This function processes a list of molecules and initializes each one
as a `pyxtal_molecule` object. If torsions are provided, they are
applied to the respective molecules during initialization.
It stored information in the `self.molecules` attribute.
Args:
molecules: list of molecules
torsions: list of torsions
molecules (list): A list of molecules, where each entry can either be:
- A SMILES string or file path representing a molecule.
- An already-initialized `pyxtal_molecule` object.
torsions (list): A list of torsion angles. The length of `torsions`
must be equal to the length of `molecules`, or None
can be provided if no torsions are needed.
"""

# If no torsions are provided, initialize with None for each molecule
if torsions is None:
torsions = [None] * len(molecules)

# Initialize the molecules list to store processed pyxtal_molecule objects
self.molecules = []

# Iterate over the molecules
for i, mol in enumerate(molecules):
# already a pyxtal_molecule object
if isinstance(mol, pyxtal_molecule):
p_mol = mol
else:
p_mol = pyxtal_molecule(mol, seed=self.seed, torsions=torsions[i], tm=self.tol_matrix, random_state=self.random_state)
p_mol = pyxtal_molecule(mol,
seed=self.seed,
torsions=torsions[i],
tm=self.tol_matrix,
random_state=self.random_state)
self.molecules.append(p_mol)

def set_orientations(self):
Expand Down Expand Up @@ -427,56 +456,93 @@ def _set_mol_wyckoffs(self, id, numMol, pyxtal_mol, valid_ori, mol_wyks):

def _set_orientation(self, pyxtal_mol, pt, oris, wp):
"""
Generate good orientations
Generate valid orientations for a given molecule in a Wyckoff position.
It tries to generate valid orientations for the molecule by:
- Selecting a random orientation from a list of possible orientations.
- Flipping the orientation to test different alignments.
- Checking the smallest distance between atoms and ensuring it's valid.
- Using the bisection method is refine the orientation.
Args:
pyxtal_mol: The pyxtal_molecule object representing the molecule.
pt: Position of the molecule.
oris: List of potential orientations.
wp: Wyckoff position object representing the symmetry of the site.
Returns:
ms0: A valid `mol_site` object if an acceptable orientation is found.
returns `None` if no valid orientation is found within the attempts.
"""
# Use a Wyckoff_site object for the current site

# Increment the number of attempts to generate a valid orientation
self.numattempts += 1

# NOTE removing this copy causes tests to fail -> state not managed well
ori = self.random_state.choice(oris).copy()
ori.change_orientation(flip=True)

# Create a mol_site object with the current orientation
ms0 = mol_site(pyxtal_mol, pt, ori, wp, self.lattice)
# Check distances within the WP

# Check if the current orientation results in valid distances
if ms0.short_dist():
return ms0
else:
# Maximize the smallest distance for the general
# positions if needed
# Maximize the separation if needed
if len(pyxtal_mol.mol) > 1 and ori.degrees > 0:
# bisection method
# Define the distance function for bisection method
def fun_dist(angle, ori, mo, pt):
# ori0 = ori.copy()
ori.change_orientation(angle)
ms0 = mol_site(
mo,
pt,
ori,
wp,
self.lattice,
)
ms0 = mol_site(mo, pt, ori, wp, self.lattice)
return ms0.get_min_dist()

# Set initial bounds for the angle
angle_lo = ori.angle
angle_hi = angle_lo + np.pi
fun_lo = fun_dist(angle_lo, ori, pyxtal_mol, pt)
fun_hi = fun_dist(angle_hi, ori, pyxtal_mol, pt)
fun = fun_hi

fun = fun_hi # Set the initial value for the function

# Refine the orientation using a bisection method
for _it in range(self.ori_attempts):
self.numattempts += 1

# Return as soon as a good orientation is found
if (fun > 0.8) & (ms0.short_dist()):
return ms0

# Compute the midpoint angle for bisection
angle = (angle_lo + angle_hi) / 2
fun = fun_dist(angle, ori, pyxtal_mol, pt)
# print('Bisection: ', it, fun)

# Update based on the function value at the midpoint
if fun_lo > fun_hi:
angle_hi, fun_hi = angle, fun
else:
angle_lo, fun_lo = angle, fun

return None

def _check_consistency(self, site, numMol):
"""
Check if the composition is consistent with symmetry
Check if N_mol is consistent with the symmetry constraints of the system.
It verifies if the sum of molecules from the WP matches (`numMol`).
Each Wyckoff site string in the `site` list includes a number that
represents how many molecules are associated with that site.
If a inconsistency is found, it raises a ValueError with a detailed message.
Args:
site (list of str): A list of strings for Wyckoff sites. Each string
contains a number (e.g., "3a", "4b") where the number
indicates how many molecules are at that site.
numMol (int): The total number of molecules expected in the structure.
Returns:
bool: Returns `True` if the number of molecules matches `numMol`.
Raises a ValueError if they do not match.
"""
num = 0
for s in site:
Expand Down
86 changes: 64 additions & 22 deletions pyxtal/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,37 @@
molecule_collection = Collection("molecules")


# single_smiles = [
# "Cl-", "F-", "Br-", "I-", "Li+", "Na+", "Cs+", "Rb+",
# "[Cl-]", "[F-]", "[Br-]", "[I-]", "[Li+]", "[Na+]", "[Cs+]", "Rb+",
# ]
def find_rotor_from_smile(smile):
"""
Find the positions of rotatable bonds in the molecule.
Find the positions of rotatable bonds based on a SMILES string.
Rotatable bonds are those which are not part of rings and which
fit specific chemical patterns. These torsions are filtered by
rules such as avoiding atoms with only one neighbor and avoiding
equivalent torsions.
Args:
smile (str): The SMILES string representing the molecule.
Returns:
list of tuples: Each tuple represents a torsion as (i, j, k, l)
where i-j-k-l are atom indices involved in the rotatable bond.
"""

def cleaner(list_to_clean, neighbors):
"""
Remove duplicate torsion from a list of atom index tuples.
Remove duplicate and invalid torsions from a list of atom index tuples.
Filters torsions based on the neighbors count for the atoms involved in the torsion.
This avoids torsions that involve terminal atoms and duplicates.
Args:
list_to_clean (list of tuples): List of torsions (i, j, k, l)
neighbors (list of int): List of neighbors for each atom in the molecule.
Returns:
list of tuples: Cleaned list of torsions.
"""

for_remove = []
Expand All @@ -54,9 +73,12 @@ def cleaner(list_to_clean, neighbors):
# for i-j-k-l, we don't want i, l are the ending members
# C-C-S=O is not a good choice since O is only 1-coordinated
# C-C-NO2 is a good choice since O is only 1-coordinated

# Remove torsions that involve terminal atoms with only one neighbor
if neighbors[ix0] == 1 and neighbors[ix1] == 2 or neighbors[ix3] == 1 and neighbors[ix2] == 2:
for_remove.append(x)
else:
# Remove duplicate torsions that are equivalent
for y in reversed(range(x)):
ix1 = itemgetter(1)(list_to_clean[x])
ix2 = itemgetter(2)(list_to_clean[x])
Expand All @@ -75,6 +97,7 @@ def cleaner(list_to_clean, neighbors):
else:
from rdkit import Chem

# SMARTS patterns to identify rotatable bonds and double bonds
smarts_torsion1 = "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]"
smarts_torsion2 = "[*]~[^2]=[^2]~[*]" # C=C bonds
# smarts_torsion2="[*]~[^1]#[^1]~[*]" # C-C triples bonds, to be fixed
Expand All @@ -89,7 +112,11 @@ def cleaner(list_to_clean, neighbors):
torsion1 = cleaner(list(mol.GetSubstructMatches(patn_tor1)), neighbors)
patn_tor2 = Chem.MolFromSmarts(smarts_torsion2)
torsion2 = cleaner(list(mol.GetSubstructMatches(patn_tor2)), neighbors)

# Combine and clean torsions
tmp = cleaner(torsion1 + torsion2, neighbors)

# Exclude torsions that are part of rings
torsions = []
for t in tmp:
(i, j, k, l) = t
Expand All @@ -103,13 +130,13 @@ def cleaner(list_to_clean, neighbors):
def has_non_aromatic_ring(smiles):
"""
Determine if a molecule has a non-aromatic ring.
Mainly used to check if a cyclic ring exists.
It checks if a cyclic ring system exists that is not aromatic.
Args:
smiles: smiles string
smiles (str): A SMILES string representing the molecule.
Returns:
True or False
bool: True if it contains a non-aromatic ring, False otherwise.
"""
from rdkit import Chem

Expand Down Expand Up @@ -201,24 +228,33 @@ def get_conformers(smile, seed):

class pyxtal_molecule:
"""
Extended molecule class based on pymatgen.core.structure.Molecule
The added features include:
0, parse the input
1, estimate volume/tolerance/radii
2, find and store symmetry
3, get the principle axis
4, re-align the molecule
The molecule is always centered at (0, 0, 0).
If the smile format is used, the center is defined as in
A molecule class to support the descriptin of molecules in a xtal
Features:
0. Parse the input from different formats (SMILES, xyz, gjf, etc.).
1. Estimate molecular properties such as volume, tolerance, and radii.
2. Find and store symmetry information of the molecule.
3. Get the principal axis of the molecule.
4. Re-align the molecule to center it at (0, 0, 0).
SMILES Format:
If a SMILES format is used, the molecular center is defined following
RDKit's handling of molecular transformations:
https://www.rdkit.org/docs/source/rdkit.Chem.rdMolTransforms.html
Otherwise, the center is just the mean of atomic positions
Args:
mol: a string to reprent the molecule
tm: tolerance matrix
mol (str or pymatgen.Molecule): The molecule representation, either as a string
(SMILES or filename) or as a pymatgen `Molecule` object.
tm (Tol_matrix, optional): A tolerance matrix object, used for molecular tolerances.
symmetrize (bool, optional): Whether to symmetrize the molecule using its point group.
fix (bool, optional): Fix torsions in the molecule.
torsions (list, optional): List of torsions to analyze or fix.
seed (int, optional): Random seed for internal processes. Defaults to a hex seed.
random_state (int or numpy.Generator, optional): Numpy random state for random number generation.
symtol (float, optional): Symmetry tolerance. Default is 0.3.
active_sites (list, optional): List of active sites within the molecule.
"""

def list_molecules():
Expand All @@ -237,7 +273,9 @@ def __init__(
random_state=None,
tm=Tol_matrix(prototype="molecular"),
symtol=0.3,
active_sites=None,
):

mo = None
self.smile = None
self.torsionlist = [] #None
Expand All @@ -246,6 +284,9 @@ def __init__(
seed = 0xF00D
self.seed = seed

# Active sites is a two list of tuples [(donors), (acceptors)]
self.active_sites = active_sites

if isinstance(random_state, Generator):
self.random_state = random_state.spawn(1)[0]
else:
Expand Down Expand Up @@ -297,6 +338,7 @@ def __init__(
self.mol = mo
self.get_symmetry()

# Additional molecular properties
self.tm = tm
self.box = self.get_box()
self.volume = self.box.volume
Expand Down

0 comments on commit d3ccc85

Please sign in to comment.