lab-cosmo · max-veit · Apr 12, 2021 · Dec 7, 2020 · Dec 11, 2020 · Dec 11, 2020
diff --git a/README.rst b/README.rst
@@ -92,11 +92,11 @@ The following packages are required for building some optional features:
 +==================+=============+====================+
 | Documentation    | pandoc      | (latest)           |
 +------------------+-------------+--------------------+
-|                  | sphinx      | 2.1.2              |
+|                  | sphinx      | 2.1.2 or later     |
 +------------------+-------------+--------------------+
-|                  | breathe     | 4.13.1             |
+|                  | breathe     | 4.14.1 or later    |
 +------------------+-------------+--------------------+
-|                  | nbsphinx    | (latest)           |
+|                  | nbsphinx    | 0.8.1 or later     |
 +------------------+-------------+--------------------+
 
 Compiling

diff --git a/bindings/rascal/models/IP_generic_md.py b/bindings/rascal/models/IP_generic_md.py
@@ -0,0 +1,103 @@
+import ase.io
+import ase.units
+import numpy as np
+
+from ..utils import BaseIO, load_obj
+from ..neighbourlist.structure_manager import AtomsList, unpack_ase
+
+
+class GenericMDCalculator(BaseIO):
+
+    """Generic MD driver for a librascal model
+
+    Initialize with model JSON and a structure template, and calculate
+    energies and forces based on position/cell updates _assuming the
+    order and identity of atoms does not change_.
+    """
+
+    def __init__(self, model_json, structure_template, assume_pbc=True):
+        """Initialize a model and structure template
+
+        Parameters
+        ----------
+        model_json  Filename for a JSON file defining the potential
+        structure_template
+                    Filename for an ASE-compatible Atoms object, used
+                    only to initialize atom types and numbers
+        assume_pbc  Force the PBC to True (to avoid subtle issues that
+                    sometimes arise when no cell is defined), default True
+        """
+        super(GenericMDCalculator, self).__init__()
+        self.model_filename = model_json
+        self.model = load_obj(model_json)
+        self.representation = self.model.get_representation_calculator()
+        self.template_filename = structure_template
+        self.atoms = ase.io.read(structure_template, 0)
+        if assume_pbc:
+            self.atoms.pbc = True
+        self.manager = None
+        self.matrix_indices_in_voigt_notation = [
+            (0, 0),
+            (1, 1),
+            (2, 2),
+            (1, 2),
+            (0, 2),
+            (0, 1),
+        ]
+
+    def calculate(self, positions, cell_matrix):
+        """Calculate energies and forces from position/cell update
+
+        positions   Atomic positions (Nx3 matrix)
+        cell_matrix Unit cell (in ASE format, cell vectors as rows)
+
+        The units of positions and cell are determined by the model JSON
+        file; for now, only Å is supported.  Energies, forces, and
+        stresses are returned in the same units (eV and Å supported).
+
+        Returns a tuple of energy, forces, and stress - forces are
+        returned as an Nx3 array and stresses are returned as a 3x3 array
+
+        Stress convention: The stresses have units eV/Å^3
+        (volume-normalized) and are defined as the gradients of the
+        energy with respect to the cell parameters.
+        """
+        # Update ASE Atoms object (we only use ASE to handle any
+        # re-wrapping of the atoms that needs to take place)
+        self.atoms.set_cell(cell_matrix)
+        self.atoms.set_positions(positions)
+
+        # Convert from ASE to librascal
+        if self.manager is None:
+            #  happens at the begining of the MD run
+            at = self.atoms.copy()
+            at.wrap(eps=1e-11)
+            self.manager = [at]
+        elif isinstance(self.manager, AtomsList):
+            structure = unpack_ase(self.atoms, wrap_pos=True)
+            structure.pop("center_atoms_mask")
+            self.manager[0].update(**structure)
+
+        # Compute representations and evaluate model
+        self.manager = self.representation.transform(self.manager)
+        energy = self.model.predict(self.manager)
+        forces = self.model.predict_forces(self.manager)
+        stress_voigt = self.model.predict_stress(self.manager)
+        stress_matrix = np.zeros((3, 3))
+        stress_matrix[tuple(zip(*self.matrix_indices_in_voigt_notation))] = stress_voigt
+        # Symmetrize the stress matrix (replicate upper-diagonal entries)
+        stress_matrix += np.triu(stress_matrix).T
+        return energy, forces, stress_matrix
+
+    def _get_init_params(self):
+        init_params = dict(
+            model_json=self.model_filename, structure_template=self.template_filename
+        )
+        return init_params
+
+    def _set_data(self, data):
+        self.manager = None
+        super()._set_data(data)
+
+    def _get_data(self):
+        return super()._get_data()
diff --git a/bindings/rascal/models/__init__.py b/bindings/rascal/models/__init__.py
@@ -1,2 +1,2 @@
-from .krr import train_gap_model, KRR
+from .krr import train_gap_model, KRR, compute_KNM
 from .kernels import Kernel
diff --git a/bindings/rascal/models/krr.py b/bindings/rascal/models/krr.py
@@ -1,9 +1,17 @@
-from ..utils import BaseIO
+from ..utils import BaseIO, is_notebook
 from ..lib import compute_sparse_kernel_gradients, compute_sparse_kernel_neg_stress
 
 import numpy as np
 import ase
 
+try:
+    if is_notebook():
+        from tqdm.notebook import tqdm
+    else:
+        from tqdm import tqdm
+except ImportError:
+    from ..utils.misc import tqdm_nop as tqdm
+
 
 class KRR(BaseIO):
     """Kernel Ridge Regression model. Only supports sparse GPR
@@ -23,15 +31,38 @@ class KRR(BaseIO):
     self_contributions : dictionary
         map atomic number to the property baseline, e.g. isolated atoms
         energies when the model has been trained on total energies.
+
+    description : string
+        User-defined string used to describe the model for future reference
+
+    units : dict
+        Energy and length units used by the model (default: eV and Å (aka AA),
+        same as used in ASE)
     """
 
-    def __init__(self, weights, kernel, X_train, self_contributions):
+    def __init__(
+        self,
+        weights,
+        kernel,
+        X_train,
+        self_contributions,
+        description="KRR potential model",
+        units=None,
+    ):
         # Weights of the krr model
         self.weights = weights
         self.kernel = kernel
         self.X_train = X_train
         self.self_contributions = self_contributions
         self.target_type = kernel.target_type
+        self.description = description
+        if units is None:
+            units = dict()
+        if "energy" not in units:
+            units["energy"] = "eV"
+        if "length" not in units:
+            units["length"] = "AA"
+        self.units = units
 
     def _get_property_baseline(self, managers):
         """build total baseline contribution for each prediction"""
@@ -186,6 +217,8 @@ def _get_init_params(self):
             kernel=self.kernel,
             X_train=self.X_train,
             self_contributions=self.self_contributions,
+            description=self.description,
+            units=self.units.copy(),
         )
         return init_params
 
@@ -199,11 +232,78 @@ def get_representation_calculator(self):
         return self.kernel._rep
 
 
+# TODO(max, felix) I think this belongs in utils; it's not KRR-specific
+def get_grad_strides(frames):
+    """Get strides for total-energy/gradient kernels of the given structures
+
+    Parameters:
+        frames  List of structures each indicating the number of atoms
+
+    Returns: (1) the number of structures, (2) the number of gradient entries
+    (== 3 * the total number of atoms), and (3) strides for assigning the
+    gradient entries for each structure
+    """
+    Nstructures = len(frames)
+    Ngrad_stride = [0]
+    Ngrads = 0
+    for frame in frames:
+        n_at = len(frame)
+        Ngrad_stride.append(n_at * 3)
+        Ngrads += n_at * 3
+    Ngrad_stride = np.cumsum(Ngrad_stride) + Nstructures
+    return Nstructures, Ngrads, Ngrad_stride
+
+
+def compute_kernel_single(i_frame, frame, representation, X_sparse, kernel):
+    """Compute kernel of the (new) structure against the sparse points
+
+    Parameters:
+        i_frame     frame index (ignored???)
+        frame       New structure to compute kernel for
+        representation
+                    RepresentationCalculator to use for the structures
+        X_sparse    Sparse points to compute kernels against
+        kernel      Kernel object to use
+
+    Return both the kernel and the gradient of the kernel
+    """
+    feat = representation.transform([frame])
+    en_row = kernel(feat, X_sparse)
+    grad_rows = kernel(feat, X_sparse, grad=(True, False))
+    return en_row, grad_rows
+
+
+def compute_KNM(frames, X_sparse, kernel, soap):
+    """Compute kernel of the (new) structure against the sparse points
+
+    Parameters:
+        frame       New structure to compute kernel for
+        representation
+                    RepresentationCalculator to use for the structures
+        X_sparse    Sparse points to compute kernels against
+        kernel      Kernel object to use
+
+    Return the kernel stacked with the gradient of the kernel
+    """
+    Nstructures, Ngrads, Ngrad_stride = get_grad_strides(frames)
+    KNM = np.zeros((Nstructures + Ngrads, X_sparse.size()))
+    pbar = tqdm(frames, desc="compute KNM", leave=False)
+    for i_frame, frame in enumerate(frames):
+        en_row, grad_rows = compute_kernel_single(
+            i_frame, frame, soap, X_sparse, kernel
+        )
+        KNM[Ngrad_stride[i_frame] : Ngrad_stride[i_frame + 1]] = grad_rows
+        KNM[i_frame] = en_row
+        pbar.update()
+    pbar.close()
+    return KNM
+
+
 def train_gap_model(
     kernel,
-    managers,
+    frames,
     KNM_,
-    X_pseudo,
+    X_sparse,
     y_train,
     self_contributions,
     grad_train=None,
@@ -253,15 +353,15 @@ def train_gap_model(
     kernel : Kernel
         SparseKernel to compute KMM and initialize the model. It was used to
         build KNM_.
-    managers : AtomsList
-        Training structures with features (and gradients)
+    frames : list(ase.Atoms)
+        Training structures
     KNM_ : np.array
         kernel matrix to use in the training, typically computed with:
-        KNM = kernel(managers, X_pseudo)
-        KNM_down = kernel(managers, X_pseudo, grad=(True, False))
+        KNM = kernel(managers, X_sparse)
+        KNM_down = kernel(managers, X_sparse, grad=(True, False))
         KNM = np.vstack([KNM, KNM_down])
         when training with derivatives.
-    X_pseudo : SparsePoints
+    X_sparse : SparsePoints
         basis samples to use in the model's interpolation
     y_train : np.array
         reference property
@@ -291,16 +391,16 @@ def train_gap_model(
         In Handbook of Materials Modeling (pp. 1–27). Springer, Cham.
         https://doi.org/10.1007/978-3-319-42913-7_68-1
     """
-    KMM = kernel(X_pseudo)
+    KMM = kernel(X_sparse)
     Y = y_train.reshape((-1, 1)).copy()
     KNM = KNM_.copy()
     n_centers = Y.shape[0]
     Natoms = np.zeros(n_centers)
     Y0 = np.zeros((n_centers, 1))
-    for iframe, manager in enumerate(managers):
-        Natoms[iframe] = len(manager)
-        for center in manager:
-            Y0[iframe] += self_contributions[center.atom_type]
+    for iframe, frame in enumerate(frames):
+        Natoms[iframe] = len(frame)
+        for sp in frame.get_atomic_numbers():
+            Y0[iframe] += self_contributions[sp]
     Y = Y - Y0
     delta = np.std(Y)
     # lambdas[0] is provided per atom hence the '* np.sqrt(Natoms)'
@@ -320,7 +420,7 @@ def train_gap_model(
     K = KMM + np.dot(KNM.T, KNM)
     Y = np.dot(KNM.T, Y)
     weights = np.linalg.lstsq(K, Y, rcond=None)[0]
-    model = KRR(weights, kernel, X_pseudo, self_contributions)
+    model = KRR(weights, kernel, X_sparse, self_contributions)
 
     # avoid memory clogging
     del K, KMM

diff --git a/bindings/rascal/utils/misc.py b/bindings/rascal/utils/misc.py
@@ -1,5 +1,9 @@
 """Miscellaneous useful utilities"""
 
+import logging
+
+LOGGER = logging.getLogger(__name__)
+
 
 def is_notebook():
     """Is this being run inside an IPython Notebook?"""
@@ -15,3 +19,21 @@ def is_notebook():
             return False  # Other type (?)
     except NameError:
         return False  # Probably standard Python interpreter
+
+
+class tqdm_nop:
+    """A simple no-op class to replace tqdm if it is not available"""
+
+    def __init__(self, iterable, **kwargs):
+        LOGGER.warn("tqdm not available")
+        LOGGER.warn("(tried to call tqdm with args: {:s})".format(str(kwargs)))
+        self.iterable = iterable
+
+    def __iter__(self):
+        return iter(self.iterable)
+
+    def update(self):
+        pass
+
+    def close(self):
+        pass
diff --git a/docs/source/bibliography.rst b/docs/source/bibliography.rst
@@ -1,7 +1,7 @@
 .. _bibliography:
 
-References
-==========
+Bibliography
+============
 
 .. [AllenTildesley] D. J. Tildesley and M.P. Allen, *Computer Simulations of Liquids*, Oxford University Press
 .. [Behler2007] J. Behler and M. Parinello, *Generalized Neural-Network Representations of High-Dimensional Potential-Energy Surfaces*, Phys. Rev. Lett 98, 146401

diff --git a/docs/source/dev_guide/developer.rst b/docs/source/dev_guide/developer.rst
@@ -13,6 +13,7 @@ We strongly advise our contributors to check these guidlines before submitting t
    how_to_misc
    how_to_add_representation
    how_to_test
+   neighbour_list/neighbour-list-manager
    forwarding_of_property_requests
    coding-convention
    review_process

diff --git a/docs/source/neighbour_list/adaptors.rst → ...rce/dev_guide/neighbour_list/adaptors.rst b/docs/source/neighbour_list/adaptors.rst → ...rce/dev_guide/neighbour_list/adaptors.rst
diff --git a/...neighbour_list/neighbour-list-manager.rst → ...neighbour_list/neighbour-list-manager.rst b/...neighbour_list/neighbour-list-manager.rst → ...neighbour_list/neighbour-list-manager.rst
@@ -1,7 +1,7 @@
 .. _neighbour-list-manager:
 
 Neighbour List Manager
-=====================
+======================
 
 Librascal provides a user friendly, flexible and efficient infrastructure to build neighbour list meeting the specific needs of many represenations of atomic neighbourhood.
 A neighbour list in librascal manages and/or builds all the data relevant to the atomic structure it refers to, e.g. the atomic structure (positions, atomic types, cell, periodic boundary conditions), the distances between a central atom and its neighbours, the representations associated to the atomic structure...

diff --git a/...source/neighbour_list/species-manager.rst → ..._guide/neighbour_list/species-manager.rst b/...source/neighbour_list/species-manager.rst → ..._guide/neighbour_list/species-manager.rst
diff --git a/docs/source/examples/examples.rst b/docs/source/examples/examples.rst
@@ -0,0 +1,12 @@
+Examples
+========
+
+This section contains self-contained examples of using `librascal` for various
+atomistic modelling tasks.  The examples are structured in the form of Jupyter
+notebooks, rendered for viewing here and available for interactive execution in
+the top-level :file:`examples/` directory.
+
+.. toctree::
+   :maxdepth: 1
+
+   zundel_IP.ipynb
diff --git a/docs/source/examples/zundel_IP.ipynb b/docs/source/examples/zundel_IP.ipynb
@@ -0,0 +1 @@
+../../../examples/iPi/zundel/zundel_IP.ipynb