mir-group · kylebystrom · Dec 4, 2024 · Nov 11, 2024 · Nov 11, 2024 · Nov 11, 2024
diff --git a/ciderpress/data/__init__.py b/ciderpress/data/__init__.py
@@ -1,3 +1,23 @@
+#!/usr/bin/env python
+# CiderPress: Machine-learning based density functional theory calculations
+# Copyright (C) 2024 The President and Fellows of Harvard College
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>
+#
+# Author: Kyle Bystrom <[email protected]>
+#
+
 import os
 
 import numpy as np

diff --git a/ciderpress/dft/density_util.py b/ciderpress/dft/density_util.py
@@ -1,3 +1,23 @@
+#!/usr/bin/env python
+# CiderPress: Machine-learning based density functional theory calculations
+# Copyright (C) 2024 The President and Fellows of Harvard College
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>
+#
+# Author: Kyle Bystrom <[email protected]>
+#
+
 import numpy as np
 
 

diff --git a/ciderpress/dft/lcao_convolutions.py b/ciderpress/dft/lcao_convolutions.py
@@ -102,7 +102,6 @@ def get_etb_from_expnt_range(
     ns = nmaxs - nmins + 1
     etb = []
     for l, n in enumerate(np.ceil(ns).astype(int)):
-        # print(l, n, emin_by_l[l], emax_by_l[l], beta)
         if n > 0 and l <= lmax:
             etb.append((l, n, emin_by_l[l], beta))
     return etb

diff --git a/ciderpress/dft/lcao_interpolation.py b/ciderpress/dft/lcao_interpolation.py
@@ -21,6 +21,7 @@
 import ctypes
 
 import numpy as np
+from pyscf import lib as pyscflib
 
 from ciderpress import lib
 from ciderpress.dft.lcao_convolutions import (
@@ -375,6 +376,14 @@ def natm(self):
         return self.atco.natm
 
     def _set_num_ai(self, all_coords):
+        """
+        This function computes the number of coordinates in
+        all_coords that fall in each spline "box" for the radial
+        interpolation grid on each atom. The result is then
+        used to create the _loc_ai member, which in turn
+        is used by the _compute_sline_ind_order function
+        to order grid coordinates by their spline box on a given atom.
+        """
         if all_coords is None:
             assert self.is_num_ai_setup
             return
@@ -424,6 +433,12 @@ def _set_num_ai(self, all_coords):
         self.is_num_ai_setup = True
 
     def _compute_spline_ind_order(self, a):
+        """
+        Assuming _set_num_ai has been called previously, order
+        self.all_coords such that each coordinate is in increasing
+        order of spline index for the radial interpolation grid
+        on atom a.
+        """
         if not self.is_num_ai_setup:
             raise RuntimeError
         ngrids_tot = self.all_coords.shape[0]
@@ -451,6 +466,12 @@ def _compute_spline_ind_order(self, a):
         return self._coords_ord, self._ind_ord_fwd
 
     def _eval_spline_bas_single(self, a):
+        """
+        Note that _set_num_ai must have been called previously, because
+        it is required for _compute_spline_ind_order to work, which
+        is called by this function. TODO might be better to have a cleaner
+        solution for this algorithm.
+        """
         self._compute_spline_ind_order(a)
         ngrids = self._coords_ord.shape[0]
         if self.onsite_direct:
@@ -585,7 +606,7 @@ def _contract_grad_terms(self, excsum, f_g, a, v):
         assert iatom_list is not None
         assert iatom_list.flags.c_contiguous
         ngrids = iatom_list.size
-        libcider.contract_grad_terms2(
+        libcider.contract_grad_terms_parallel(
             excsum.ctypes.data_as(ctypes.c_void_p),
             f_g.ctypes.data_as(ctypes.c_void_p),
             ctypes.c_int(self.atco.natm),
@@ -637,8 +658,7 @@ def _interpolate_nopar_atom_deriv(self, f_arlpq, f_gq):
                     args[3] = auxo_vgp[0].ctypes.data_as(ctypes.c_void_p)
                     fn(*args)
                     self._call_l1_fill(ftmp_gq, self.atom_coords[a], True)
-                    # TODO accelerate since this step will be done many times
-                    ftmp = np.einsum("gq,gq->g", ftmp_gq, f_gq)
+                    ftmp = pyscflib.einsum("gq,gq->g", ftmp_gq, f_gq)
                     self._contract_grad_terms(excsum, ftmp, a, v)
         return excsum
 
@@ -695,7 +715,6 @@ def project_orb2grid(self, f_uq, f_gq=None):
         Args:
             f_uq:
             f_gq:
-            spline_buf: Must be all zeros (TODO this is unsafe)
 
         Returns:
 
@@ -834,7 +853,6 @@ def project_orb2grid(self, f_uq, f_gq=None):
         Args:
             f_uq:
             f_gq:
-            spline_buf: Must be all zeros (TODO this is unsafe)
 
         Returns:
 

diff --git a/ciderpress/dft/model_utils.py b/ciderpress/dft/model_utils.py
@@ -1,3 +1,23 @@
+#!/usr/bin/env python
+# CiderPress: Machine-learning based density functional theory calculations
+# Copyright (C) 2024 The President and Fellows of Harvard College
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>
+#
+# Author: Kyle Bystrom <[email protected]>
+#
+
 import joblib
 import yaml
 

diff --git a/ciderpress/dft/plans.py b/ciderpress/dft/plans.py
@@ -2055,7 +2055,6 @@ def get_a2q_fast(self, exp_g):
         """A fast (parallel C-backend) version of get_a2q that
         also has a local option to get the q values just
         for the local alphas."""
-        # TODO add bounds checking
         di = np.empty_like(exp_g)
         derivi = np.empty_like(exp_g)
         if self.alpha_formula == "etb":

diff --git a/ciderpress/dft/pwutil.py b/ciderpress/dft/pwutil.py
@@ -1,3 +1,23 @@
+#!/usr/bin/env python
+# CiderPress: Machine-learning based density functional theory calculations
+# Copyright (C) 2024 The President and Fellows of Harvard College
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>
+#
+# Author: Kyle Bystrom <[email protected]>
+#
+
 import ctypes
 
 import numpy as np

diff --git a/ciderpress/dft/settings.py b/ciderpress/dft/settings.py
@@ -72,6 +72,9 @@ def _get_fl_ueg(s):
 def get_cider_exponent(
     rho, sigma, tau, a0=1.0, grad_mul=0.0, tau_mul=0.03125, rhocut=1e-10, nspin=1
 ):
+    """
+    Evaluate an NLDF length-scale exponent at the MGGA level.
+    """
     if nspin > 2:
         raise ValueError
     if isinstance(rho, np.ndarray):
@@ -113,6 +116,9 @@ def get_cider_exponent(
 
 
 def get_cider_exponent_gga(rho, sigma, a0=1.0, grad_mul=0.03125, rhocut=1e-10, nspin=1):
+    """
+    Evaluate an NLDF length-scale exponent at the GGA level.
+    """
     if nspin > 2:
         raise ValueError
     if isinstance(rho, np.ndarray):
@@ -161,6 +167,13 @@ def _get_ueg_expnt(aval, tval, rho):
 
 
 class BaseSettings(ABC):
+    """
+    This is a base class for storing the settings for different
+    types of density/density matrix feature in CiderPress.
+    Settings objects indicate which features must be evaluated,
+    and with which hyperparameters, to use as input to an ML functional.
+    """
+
     @property
     @abstractmethod
     def nfeat(self):
@@ -171,6 +184,9 @@ def nfeat(self):
 
     @property
     def is_empty(self):
+        """
+        Return true of this settings object specifies zero features.
+        """
         return self.nfeat == 0
 
     @abstractmethod
@@ -201,6 +217,14 @@ def get_reasonable_normalizer(self):
 
 
 class EmptySettings(BaseSettings):
+    """
+    The EmptySettings class is a representation of a feature set containing
+    zero features. It is used when a certain type of feature is not
+    present in a model. (For example, if a model does not use SDMX
+    features, that model's FeatureSettings.sdmx_settings will be
+    an EmptySettings instance.)
+    """
+
     @property
     def nfeat(self):
         return 0
@@ -509,8 +533,6 @@ def __init__(self, pows):
                 and features might be poorly defined/numerically inaccurate.
         """
         super(SDMXSettings, self).__init__("smooth")
-        # if mode not in ['diffse', 'r2', 'r4']:
-        #    raise ValueError('Mode must be exact or smooth, got {}'.format(mode))
         self.pows = pows
 
     @property
@@ -948,14 +970,26 @@ def get_reasonable_normalizer(self):
 See ``ALLOWED_J_SPECS``
 """
 
-ALLOWED_RHO_MULTS = ["one", "taumix", "dampmix", "expnt"]
+ALLOWED_RHO_MULTS = ["one", "expnt"]
 """
-TODO docs here
+These strings specify the allowed options for what value
+to multiply the density by before integrating it to construct
+NLDF features. The options are:
+
+one: Identity, i.e. multiply density by 1
+
+expnt: Multiply the density by the NLDF exponent specified
+by the theta_params. (NOTE: Experimental, not thoroughly tested.)
 """
 
-ALLOWED_RHO_DAMPS = ["none", "exponential", "asymptotic_const"]
+ALLOWED_RHO_DAMPS = ["exponential"]
 """
-TODO docs here
+These strings specify the allowed options for how to "damp"
+the density for the version k features. Currently the only allowed
+option is "exponential", which results in the integral
+:math:`\\int g[n](|r-r'|) n(r') exp(-3 a_0[n](r') / 2 a_i[n](r))`,
+where :math:`a_0` is the exponent given by ``theta_params``
+and :math:`a_i` is an exponet given by ``feat_params``.
 """
 
 SPEC_USPS = {
@@ -980,8 +1014,6 @@ def get_reasonable_normalizer(self):
 """
 RHO_MULT_USPS = {
     "one": 0,
-    "taumix": 0,
-    "dampmix": 0,
     "expnt": 2,
 }
 
@@ -1016,7 +1048,7 @@ def __init__(
                 Should be an array of 3 floats [a0, grad_mul, tau_mul].
                 tau_mul is ignored if sl_level="GGA" and may therefore be excluded.
             rho_mult (str): Multiply the density that gets integrated
-                by a prefactor. Options: None/'one', 'taumix', 'dampmix', 'expnt'
+                by a prefactor. Options: See ALLOWED_RHO_MULTS.
         """
         self._sl_level = sl_level
         self.theta_params = theta_params
@@ -1130,7 +1162,7 @@ def __init__(
                 'exponent' is not used in the squared-exponential but rather
                 within the density damping scheme (see rho_damp).
             rho_mult (str): Multiply the density that gets integrated
-                by a prefactor. Options: None/'one', 'taumix', 'dampmix', 'expnt'
+                by a prefactor. Options: See ALLOWED_RHO_MULTS.
             l0_feat_specs (list of str): Each item in the list is a str
                 specifying the formula to be used for the scalar (l=0)
                 features. See ALLOWED_I_SPECS_L0 for allowed values.
@@ -1264,7 +1296,7 @@ def __init__(
                 'exponent' is not used in the squared-exponential but rather
                 within the density damping scheme (see rho_damp).
             rho_mult (str): Multiply the density that gets integrated
-                by a prefactor. Options: None/'one', 'taumix', 'dampmix', 'expnt'
+                by a prefactor. Options: See ALLOWED_RHO_MULTS.
             feat_specs (list of str):
                 Each item in the list is a string specifying the formula
                 to be used for a feature (see ALLOWED_J_SPECS for options).
@@ -1392,7 +1424,7 @@ def __init__(
                 'exponent' is not used in the squared-exponential but rather
                 within the density damping scheme (see rho_damp).
             rho_mult (str): Multiply the density that gets integrated
-                by a prefactor. Options: None/'one', 'taumix', 'dampmix', 'expnt'
+                by a prefactor. Options: See ALLOWED_RHO_MULTS.
             l0_feat_specs_i (list of str): Each item in the list is a str
                 specifying the formula to be used for the scalar (l=0)
                 features. See ALLOWED_I_SPECS_L0 for allowed values.
@@ -1504,7 +1536,7 @@ def __init__(
                 'exponent' is not used in the squared-exponential but rather
                 within the density damping scheme (see rho_damp).
             rho_mult (str): Multiply the density that gets integrated
-                by a prefactor. Options: None/'one', 'taumix', 'dampmix', 'expnt'
+                by a prefactor. Options: See ALLOWED_RHO_MULTS.
             feat_params (list of np.ndarray):
                 Each item in the list is an array with the parameters for the
                 feature corresponding to the feat_specs above. Typically, each array
@@ -1522,6 +1554,8 @@ def __init__(
         for s, p in zip(self.feat_specs, self.feat_params):
             self._check_params(p, spec=s)
         self.rho_damp = rho_damp
+        if self.rho_damp not in ALLOWED_RHO_DAMPS:
+            raise ValueError("rho_damp argument must be in ALLOWED_RHO_DAMPS.")
 
     @property
     def num_feat_param_sets(self):