diff --git a/bindings/rascal/models/krr.py b/bindings/rascal/models/krr.py index d85e32704..e9fccc3fa 100644 --- a/bindings/rascal/models/krr.py +++ b/bindings/rascal/models/krr.py @@ -4,23 +4,15 @@ Kernel Kernel Ridge Regression model (sparse GPR only). Public functions: - compute_kernel_single Compute GAP kernel of a single structure compute_KNM Compute GAP kernel of a set of structures + train_gap_model Train a GAP model given a kernel matrix and sparse points """ -from ..utils import BaseIO, is_notebook +from ..utils import BaseIO from ..lib import compute_sparse_kernel_gradients, compute_sparse_kernel_neg_stress import numpy as np import ase -try: - if is_notebook(): - from tqdm.notebook import tqdm - else: - from tqdm import tqdm -except ImportError: - from ..utils.misc import tqdm_nop as tqdm - class KRR(BaseIO): """Kernel Ridge Regression model. Only supports sparse GPR @@ -241,7 +233,6 @@ def get_representation_calculator(self): return self.kernel._rep -# TODO(max, felix) I think this belongs in utils; it's not KRR-specific def _get_kernel_strides(frames): """Get strides for total-energy/gradient kernels of the given structures @@ -256,11 +247,11 @@ def _get_kernel_strides(frames): Returns ------- - (1) + int the number of structures - (2) + int the number of gradient entries (== 3 * the total number of atoms) - (3) + np.array(int) strides for assigning the gradient entries for each structure """ Nstructures = len(frames) @@ -274,7 +265,7 @@ def _get_kernel_strides(frames): return Nstructures, Ngrads, Ngrad_stride -def compute_kernel_single(i_frame, frame, representation, X_sparse, kernel): +def _compute_kernel_single(i_frame, frame, representation, X_sparse, kernel): """Compute GAP kernel of the (new) structure against the sparse points Parameters @@ -321,18 +312,38 @@ def compute_KNM(frames, X_sparse, kernel, soap): ------- K_NM: np.array Summed total-energy kernel stacked with the atom-position gradient of the kernel + + Notes + ----- + This function can take quite a long time to run. To get a progress bar, + you can wrap the `frames` parameter in a [tqdm]_ object like this: + + .. code-block:: python + + from tqdm.notebook import tqdm # for Jupyter + #from tqdm import tqdm # on the command line + K_NM = compute_KNM( + tqdm(frames, desc="compute KNM", leave=False), + X_sparse, + kernel, + soap + ) + + .. [tqdm] https://github.com/tqdm/tqdm """ - Nstructures, Ngrads, Ngrad_stride = _get_kernel_strides(frames) + # If frames has been wrapped in a tqdm, use the underlying iterable + # so as not to "use up" the progress bar prematurely + if hasattr(frames, "iterable"): + Nstructures, Ngrads, Ngrad_stride = _get_kernel_strides(frames.iterable) + else: + Nstructures, Ngrads, Ngrad_stride = _get_kernel_strides(frames) KNM = np.zeros((Nstructures + Ngrads, X_sparse.size())) - pbar = tqdm(frames, desc="compute KNM", leave=False) for i_frame, frame in enumerate(frames): - en_row, grad_rows = compute_kernel_single( + en_row, grad_rows = _compute_kernel_single( i_frame, frame, soap, X_sparse, kernel ) KNM[Ngrad_stride[i_frame] : Ngrad_stride[i_frame + 1]] = grad_rows KNM[i_frame] = en_row - pbar.update() - pbar.close() return KNM diff --git a/bindings/rascal/utils/__init__.py b/bindings/rascal/utils/__init__.py index 70cb05b88..115fcc4ec 100644 --- a/bindings/rascal/utils/__init__.py +++ b/bindings/rascal/utils/__init__.py @@ -7,7 +7,6 @@ dump_obj, load_obj, ) -from .misc import is_notebook # Warning potential dependency loop: FPS imports models, which imports KRR, # which imports this file again diff --git a/bindings/rascal/utils/misc.py b/bindings/rascal/utils/misc.py deleted file mode 100644 index 787407f2a..000000000 --- a/bindings/rascal/utils/misc.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Miscellaneous useful utilities""" - -import logging - -LOGGER = logging.getLogger(__name__) - - -def is_notebook(): - """Is this being run inside an IPython Notebook?""" - from IPython import get_ipython - - try: - shell = get_ipython().__class__.__name__ - if shell == "ZMQInteractiveShell": - return True # Jupyter notebook or qtconsole - elif shell == "TerminalInteractiveShell": - return False # Terminal running IPython - else: - return False # Other type (?) - except NameError: - return False # Probably standard Python interpreter - - -class tqdm_nop: - """A simple no-op class to replace tqdm if it is not available""" - - def __init__(self, iterable, **kwargs): - LOGGER.warn("tqdm not available") - LOGGER.warn("(tried to call tqdm with args: {:s})".format(str(kwargs))) - self.iterable = iterable - - def __iter__(self): - return iter(self.iterable) - - def update(self): - pass - - def close(self): - pass diff --git a/docs/source/examples/zundel_IP.ipynb b/docs/source/examples/zundel_IP.ipynb index f937137c6..d910d4337 120000 --- a/docs/source/examples/zundel_IP.ipynb +++ b/docs/source/examples/zundel_IP.ipynb @@ -1 +1 @@ -../../../examples/iPi/zundel/zundel_IP.ipynb \ No newline at end of file +../../../examples/i-PI/zundel/zundel_IP.ipynb \ No newline at end of file diff --git a/docs/source/reference/python.rst b/docs/source/reference/python.rst index 959ba3470..0c2ba3876 100644 --- a/docs/source/reference/python.rst +++ b/docs/source/reference/python.rst @@ -37,8 +37,6 @@ Models .. autofunction:: rascal.models.compute_KNM -.. autofunction:: rascal.models.krr.compute_kernel_single - IO === diff --git a/examples/iPi/zundel/h5o2+.xyz b/examples/i-PI/zundel/h5o2+.xyz similarity index 100% rename from examples/iPi/zundel/h5o2+.xyz rename to examples/i-PI/zundel/h5o2+.xyz diff --git a/examples/iPi/zundel/input.xml b/examples/i-PI/zundel/input.xml similarity index 100% rename from examples/iPi/zundel/input.xml rename to examples/i-PI/zundel/input.xml diff --git a/examples/iPi/zundel/run.sh b/examples/i-PI/zundel/run.sh similarity index 67% rename from examples/iPi/zundel/run.sh rename to examples/i-PI/zundel/run.sh index f914174eb..fd7886b3b 100644 --- a/examples/iPi/zundel/run.sh +++ b/examples/i-PI/zundel/run.sh @@ -3,9 +3,11 @@ # remove the driver file if it already exists rm /tmp/ipi_zundel # make sure rascal can be imported if not installed -export PYTHONPATH="../../../build_b/:$PYTHONPATH" +#export PYTHONPATH="../../../build/:$PYTHONPATH" # path to the i-Pi driver -RASCAL_DRIVER="../../../../i-pi/drivers/py/driver.py" +#RASCAL_DRIVER="../../../../i-pi/drivers/py/driver.py" +# (or add it to your PATH and use the below:) +RASCAL_DRIVER="driver.py" # i-Pi executable IPI="i-pi" diff --git a/examples/iPi/zundel/zundel_IP.ipynb b/examples/i-PI/zundel/zundel_IP.ipynb similarity index 90% rename from examples/iPi/zundel/zundel_IP.ipynb rename to examples/i-PI/zundel/zundel_IP.ipynb index 6b6c7a684..f8032bb8f 100644 --- a/examples/iPi/zundel/zundel_IP.ipynb +++ b/examples/i-PI/zundel/zundel_IP.ipynb @@ -4,14 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# A Gaussian-Approximation potential for Zundel cations" + "# A Gaussian approximation potential (GAP) for the Zundel cation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The present notebook is meant to give you an overview of the main ingredients that you need to build an interatomic potential with Librascal. We will then use it in connection with i-Pi (https://github.com/cosmo-epfl/i-pi) to generate Molecular Dynamics trajectories of the system of interest. \n", + "The present notebook is meant to give you an overview of the main ingredients that you need to build an interatomic potential with `librascal` and use it in connection with i-Pi (https://github.com/cosmo-epfl/i-pi) to generate molecular dynamics (MD) trajectories of the system of interest. \n", "We will start from building a GAP model for Zundel cations ($H_5O_2+$), using a training set obtained via Bowman PES sampling, calculate its RMSE on a test set to check its performance and run a short NVT simulation at $\\text{T} = 250\\,\\text{K}$. \n", "\n", "The mathematical framework that we are going to use is the kernel-GAP fitting method, using both total energies and atomic forces as target properties. Basically the GAP-model total energy of a zundel molecule is computed using the following expression: \n", @@ -26,8 +26,12 @@ " K(\\bf{d_i} , \\bf{d_s} ) \\propto \\left| \\bf{d_i} \\cdot \\bf{d_s} \\right|^{\\zeta} \n", "$$\n", "\n", - "Finally $\\alpha_s$ represents the weights of each sparse environment, to be determined using Kernel-Ridge Regression (KRR).\n", - "For extensive details on the SOAP GAP-model fitting procedure and interesting physical applications, we invite the reader to refer to...(link to the chemrev Review, once available). Details on the implementation in Librascal instead are given in https://cosmo-epfl.github.io/librascal/SOAP.html." + "Finally $\\alpha_s$ represents the weights of each sparse environment, to be determined using Kernel-Ridge Regression (KRR). For extensive details on the SOAP GAP-model fitting procedure and interesting physical applications, we invite the reader to refer to the book chapter [1].\n", + "Details on the implementation in `librascal` are instead given in the companion publication [2].\n", + "\n", + "[1]: M. Ceriotti, M.J. Willatt, and G. Csányi, in Handbook of Materials Modeling (Springer International Publishing, Cham, 2018), pp. 1–27. https://doi.org/10.1007/978-3-319-42913-7_68-1\n", + "\n", + "[2]: F. Musil, M. Veit, A. Goscinski, G. Fraux, M.J. Willatt, M. Stricker, T. Junge, and M. Ceriotti, J. Chem. Phys. 154, 114109 (2021). https://doi.org/10.1063/5.0044689" ] }, { @@ -41,16 +45,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In order to be able to fit a potential with Librascal (the model evaluator) and interface it with i-Pi (the MD engine) we first need to have both softwares available and correctly installed. To enable communication between them, two specific branches have been designed: \n", - "```\n", - "pip install git+https://github.com/cosmo-epfl/i-pi.git@feat/librascal\n", + "In order to be able to fit a potential with `librascal` (the model evaluator) and interface it with i-Pi (the MD engine) we first need to have both softwares available and correctly installed. For `librascal`, the following should suffice:\n", + "```bash\n", + "$ pip install git+https://github.com/cosmo-epfl/librascal.git\n", "```\n", - "Make sure that you git pull these branches after cloning the two github repositories.\n", "\n", - "Next step is installing Librascal using the instructions of the top-level README which could be:\n", + "For i-PI, you need to use a special branch (at least, until [PR #171](https://github.com/i-pi/i-pi/pull/171) is merged):\n", + "```bash\n", + "$ pip install git+https://github.com/cosmo-epfl/i-pi.git@feat/librascal\n", "```\n", - "pip install .\n", - "```\n" + "If these steps don't work, try looking at the `README.rst` files of the respective repositories for further instructions." ] }, { @@ -64,15 +68,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let us start from the import of all the necessary modules. This includes the basic modules of Librascal to compute: \n", + "Let us start from the import of all the necessary modules and classes. In `librascal`, these are: \n", "\n", - "1) the SOAP descriptors of a structure, by means the $\\bf{SphericalInvariants}$ class;\n", + "1) the SOAP descriptors of a structure, by means of the `SphericalInvariants` class;\n", "\n", - "2) the kernels between a set of environments and the sparse set (the $\\bf{Kernel}$ class);\n", + "2) the kernels between a set of environments and the sparse set (the `Kernel` class);\n", "\n", - "3) the GAP model itsself, which is saved as an object of the $\\bf{KRR}$ class. The predict method of the same class will allow us to give predictions to new unseen structures. \n", + "3) the GAP model itself, which is saved as an object of the `KRR` class. The predict method of the same class will allow us to give predictions to new unseen structures. \n", "\n", - "NOTE: it is supposed that the input format for the training set and the test set is an ASE-extended .xyz file, which is then converted into an array of ASE Atoms objects, each one corresponding to a specific structure. Librascal uses these frames to compute the representations and predict the properties of new structures. In this example on zundel cations, the target energies to build the model with are reported in the $\\text{zundel_energies.txt}$ file, while the atomic forces are additional columns of the xyz. \n", + "NOTE: it is assumed that the input format for the training set and the test set is an ASE-compatible (e.g. extended-XYZ) file, which is then converted into an array of ASE Atoms objects, each one corresponding to a specific structure. `librascal` uses these frames to compute the representations and predict the properties of new structures. In this example on zundel cations, the target energies to build the model with are reported in the `zundel_energies.txt` file, while the atomic forces are additional columns of the xyz. \n", "\n", "Alternatively, global target properties can be reported in the header line following the ASE format." ] @@ -100,6 +104,7 @@ "from ase.build import make_supercell\n", "from ase.visualize import view\n", "import numpy as np\n", + "# If installed -- not essential, though\n", "from tqdm.notebook import tqdm\n", "\n", "from time import time\n", @@ -116,7 +121,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the snippets below we extract the relevant properties for each ASE frame. We load the total potential energies and we use the $\\bf{ASE.Atoms.arrays}$ methods to get the atomic forces. For more information on how to use ASE-related methods, check the documentation at https://wiki.fysik.dtu.dk/ase/ase/atoms.html. " + "In the snippets below we extract the relevant properties for each ASE frame. We load the total potential energies and we use the `ASE.Atoms.arrays` methods to get the atomic forces. For more information on how to use ASE-related methods, check the [ASE Atoms documentation](https://wiki.fysik.dtu.dk/ase/ase/atoms.html)." ] }, { @@ -191,10 +196,10 @@ "# Number of structures to train the model with\n", "n = 800\n", "\n", - "global_species = []\n", + "global_species = set()\n", "for frame in frames:\n", - " global_species.extend(frame.get_atomic_numbers())\n", - "global_species = np.unique(global_species)\n", + " global_species.add(frame.get_atomic_numbers())\n", + "global_species = np.array(global_species)\n", "\n", "# Select randomly n structures for training the model\n", "ids = list(range(N_dataset))\n", @@ -232,13 +237,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now we proceed with the actual calculation of the SOAP vectors of our training set. We need to specify an hyperparameters dictionary, which Librascal uses to compute the structural features. The meaning of each single command and how to correctly set them is reported in https://cosmo-epfl.github.io/librascal/reference/python.html?highlight=soap#rascal.representations.SphericalInvariants. These hyperparameters can be used as default values, but a careful optimization of the interaction cutoff might be required in the case the material under investigation might present some mid- or long-range order. \n", - "\n", - "For the actual calculation of the SOAP features, we first create an object of the SphericalInvariants class, defined by its hyperparameters.The methods that we then need to use are $\\textbf{transform()}$, which yields a second object called the $\\textbf{manager}$ containing the representation, while $\\textbf{get_features()}$ converts it into an $NxM$ matrix, $N$ being the number of atomic environments in the training set and M the number of features per each environment. \n", + "Now we proceed with the actual calculation of the SOAP vectors of our training set. We need to specify an hyperparameters dictionary, which `librascal` uses to compute the structural features. The meaning of each hyperparameter and how to correctly set them is reported in [the `SphericalInvariants` documentation](https://cosmo-epfl.github.io/librascal/reference/python.html#rascal.representations.SphericalInvariants). These hyperparameters can be used as default values, but a careful optimization of the interaction cutoff might be required in the case the material under investigation might present some mid- or long-range order. \n", "\n", - "It is however important to stress that at this point the '$\\textbf{compute_gradients}$' hyper should be set to False. We will use in fact the SOAP representation of the training set structures to compute only the sparse set using the CUR decomposition. Setting it to 'True' would require Librascal to compute all the gradients of the SOAP representation w.r.t. atomic coordinates, thus making the structural managers unnecessarily memory expensive. We will instead compute later on the gradients of the sparse kernels using the $\\textbf{compute_KNM}$ method to fit the GAP model.\n", + "For the actual calculation of the SOAP features, we first create an object of the `SphericalInvariants` class, defined by its hyperparameters. The methods that we then need to use are `transform()}`, which yields a second object called the `manager` containing the representation, while `get_features()` converts it into an $NxM$ matrix, $N$ being the number of atomic environments in the training set and M the number of features per each environment. \n", "\n", - "NOTE: the Librascal structure manager only works if the atoms have been preliminary wrapped within the cell provided in the input file. It might however give issues whenever atoms are too close to the cell boundary, so in the case you want to model a system with periodic boundary conditions, it is common practice to displace the atoms by some non-zero vector and then wrap them back using the $\\textbf{ase.wrap()}$ function." + "At this point, the `compute_gradients` hyper should be set to `False`. For now, we will only use the SOAP representations (no gradients!) of the training set structures to select a sparse set using the CUR decomposition. Setting it to `True` would require Librascal to compute all the gradients of the SOAP representation w.r.t. atomic coordinates, thus making the structural managers unnecessarily memory expensive. We will instead compute later on the gradients of the sparse kernels using the `compute_KNM` method when we fit the GAP model." ] }, { @@ -280,7 +283,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The following snippet computes the descriptors and the managers for each frame, which will represent the expensive part of the calculation, in terms of memory usage." + "The following snippet computes the descriptors and the managers for each frame, which will represent the expensive part of the calculation, in terms of memory usage.\n", + "\n", + "NOTE: the `librascal` structure manager only works if the atoms have been wrapped within the cell provided in the input file using e.g. `ase.Atoms.wrap()`, or one of the structure preprocessors provided in `rascal.neighbourlist.structure_manager` module (useful especially for non-periodic structures)." ] }, { @@ -291,12 +296,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "654db36a01594300a6d9d6f4b674193d", + "model_id": "94339c23f47e4080afd5a9dc25eecfa3", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(FloatProgress(value=0.0, max=800.0), HTML(value='')))" + "HBox(children=(IntProgress(value=0, max=800), HTML(value='')))" ] }, "metadata": {}, @@ -307,15 +312,13 @@ "output_type": "stream", "text": [ "\n", - "Execution: 0.5612576007843018 s\n" + "Execution: 0.20978784561157227 s\n" ] } ], "source": [ "managers = []\n", "for f in tqdm(frames_train):\n", - " positions = f.get_positions()\n", - " f.set_positions(positions+[1,1,1])\n", " f.wrap(eps=1e-18)\n", "\n", "start = time()\n", @@ -327,7 +330,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "At this point, we can define the $\\textbf{sparse set}$, i.e. the set of \"maximally diverse\" environments that will be used to perform the Kernel Ridge Regression. We can choose the number of sparse environments in Librascal per each atomic species, by defining a dictionary containing (atomic number, number of environments) pairs. The CURFilter class then filters out the most diverse environments according to the representation that we provide as the input object and by applying the CUR decomposition. " + "At this point, we can define the **sparse set**, i.e. the set of (ideally) \"maximally diverse\" environments that will be used as basis points of the kernel ridge regression (KRR, mostly equivalent to GAP) model. We can choose the number of sparse environments in `librascal` per each atomic species, by defining a dictionary containing (atomic number, number of environments) pairs. The CURFilter class then selects out the most \"representative\" environments according to the representation that we provide as the input object and by applying the CUR decomposition [3].\n", + "\n", + "[3]: 1 M.W. Mahoney and P. Drineas, PNAS 106, 697 (2009). https://doi.org/10.1073/pnas.0803205106" ] }, { @@ -360,11 +365,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As shown in eq. $(1)$, in order to fit a GAP potential we need to compute the kernels of all the training structure descriptors and the sparse set, as well as the gradients w.r.t. the atomic positions, if we wish to fit the atomic forces. We do so by using another key structure of librascal: the $\\textbf{Kernel}$ class. We first build a kernel object containing the representation (soap), which we then use to compute the sparse kernel matrix $K_{NM}$ between the training set and the sparse set and its gradients using the $\\textbf{compute_KNM}$ method.\n", + "As shown in eq. $(1)$, in order to fit a GAP potential we need to compute the kernels of all the training structure descriptors and the sparse set, as well as the gradients w.r.t. the atomic positions, if we wish to fit the atomic forces. We do so by using another key structure of librascal: the `Kernel` class. We first build a kernel object containing the representation (SOAP), which we then use to compute the sparse kernel matrix $K_{NM}$ between the training set and the sparse set and its gradients using the `compute_KNM` method.\n", "\n", - "Finally, we group everything together to build the GAP model, which basically uses the kernel matrices to regress the weights $\\{\\alpha_s\\}_{s=1}^M$ on each sparse environment. The lambdas represent the regularization parameters of the KRR (for both energies and forces), while jitter is a small parameter that enables its numerical convergence. \n", + "Finally, we group everything together to build the GAP model, which basically uses the kernel matrices to regress the weights $\\{\\alpha_s\\}_{s=1}^M$ on each sparse environment. The `lambdas` are the regularization parameters of the KRR (for both energies and forces), while `jitter` is a small parameter that enables its numerical convergence. \n", "\n", - "The output is a KRR object, which we can save as .json file for future use. For this last bit we use the $\\textbf{dump_obj}$ method (part of the io module). " + "The output is a KRR object, which we can save as .json file for future use. For this last bit we use the `dump_obj` method (part of the `rascal.utils.io` module)." ] }, { @@ -375,12 +380,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", + "model_id": "9a9de27d88974f0aa9d05d696d88d947", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(FloatProgress(value=0.0, description='compute KNM', max=800.0, style=ProgressStyle(description_…" + "HBox(children=(IntProgress(value=0, description='Computing kernel matrix', max=800, style=ProgressStyle(descri…" ] }, "metadata": {}, @@ -390,7 +395,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Execution: 47.59540414810181 s\n" + "\n", + "Execution: 6.271355152130127 s\n" ] } ], @@ -402,7 +408,7 @@ "soap = SphericalInvariants(**hypers)\n", "kernel = Kernel(soap, name='GAP', zeta=zeta, target_type='Structure', kernel_type='Sparse')\n", "\n", - "KNM = compute_KNM(frames_train, X_sparse, kernel, soap)\n", + "KNM = compute_KNM(tqdm(frames_train, leave=True, desc=\"Computing kernel matrix\"), X_sparse, kernel, soap)\n", "\n", "model = train_gap_model(kernel, frames_train, KNM, X_sparse, y_train, energy_baseline, \n", " grad_train=-f_train, lambdas=[1e-12, 1e-12], jitter=1e-13)\n", @@ -461,7 +467,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let us now compute the predictions on the test set, using the $\\textbf{predict}$ and $\\textbf{predict_forces}$ methods of the KRR class. At this point we need to compute the SOAP representation of the test set structures. It is important to stress however that this will not cause any issue regarding memory usage, because all we need is the predictions, so we can compute the managers of the test set structures one by one and calculate the predictions right away. Instead for the GAP fitting we need to store ALL the structural managers of the training set to perform the regression (which causes a large RAM usage)." + "Let us now compute the predictions on the test set, using the `predict()` and `predict_forces()` methods of the `KRR` class. At this point we need to compute the SOAP representation of the test set structures. It is important to stress however that this will not cause any issue regarding memory usage, because all we need is the predictions, so we can compute the managers of the test set structures one by one and calculate the predictions right away. Instead for the GAP fitting we need to store ALL the structure managers of the training set to perform the regression (which causes a large RAM usage)." ] }, { @@ -527,7 +533,7 @@ " \\text{RMSE} = \\sqrt{\\frac{1}{n_{\\text{test}}}\\sum_i (y_{\\text{pred}}^{i} - y_{\\text{test}}^{i})^2}\n", "$$\n", " \n", - "which we can compare to the standard deviation of the test set itsself to quantify how much the model captures the energy variations in the test set. The $\\% \\text{RMSE}$ of our model is about $5 \\%$ of the training set STD, which is sufficiently accurate to run MD safely. \n", + "which we can compare to the standard deviation of the test set itself to quantify how much the model captures the energy variations in the test set. The $\\% \\text{RMSE}$ of our model is about $5 \\%$ of the training set STD, which is sufficiently accurate to run MD safely. \n", "\n", "Finally we plot a \"correlation plot\", to observe how well the predictions on the test set correlate with the reference DFT-computed energies.\n", "\n", @@ -622,9 +628,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now we are going to use the fitted model to perform a simple NVT simulation at $\\text{T} = 250\\,$K using the i-Pi interface with librascal. For that we will use a communication socket run by i-Pi, which basically outputs a structure produced by the MD and gives it in input to Librascal. This will in turn return energies, forces and stresses by means of the $\\textbf{GenericMDCalculator}$ class of Librascal. \n", + "Now we are going to use the fitted model to perform a simple NVT simulation at $\\text{T} = 250\\,$K using the i-Pi interface with librascal. For that we will use a communication socket run by i-Pi, which basically outputs a structure produced by the MD and gives it in input to Librascal. This will in turn return energies, forces and stresses by means of the `GenericMDCalculator` class of Librascal. \n", "\n", - "The job itsself will generate a parent process (i-Pi) which contains information of the ensemble, the step needed for the time-integration, the thermostat characteristics, and all other trajectory-related infos. All these information are initially stored in an input.xml file as specified in the i-Pi documentation at https://github.com/cosmo-epfl/i-pi and given as inputs to i-Pi. The Librascal calculator is then launched as a child process and exchanges information with the MD driver. \n", + "The job itself will generate a parent process (i-Pi) which contains information of the ensemble, the step needed for the time-integration, the thermostat characteristics, and all other trajectory-related infos. All these information are initially stored in an input.xml file as specified in the i-Pi documentation at https://github.com/cosmo-epfl/i-pi and given as inputs to i-Pi. The Librascal calculator is then launched as a child process and exchanges information with the MD driver. \n", "\n", "The Librascal driver in i-Pi needs some input parameters, that can be given directly in the command line when the driver is called. To check the needed information, just use the --help option when calling it, as shown below.\n", "\n", @@ -635,7 +641,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Check the .xml file in this folder to quickly read the relevant information about the MD settings. Importantly, the $\\text{}$ tag gives you information about the physical properties that are printed out by i-Pi. This simulation gives as output a .out file containing the time-evolution of all the relevant physical quantities, while the .xc.xyz file contains the full trajectory, which you can later visualize with VMD. The file $\\textbf{h5o2+.xyz}$ is both used by i-Pi as a starting configuration of the trajectory and by Librascal as a template to load information about chemical species and number of atoms per species of the system.\n", + "Check the .xml file in this folder to quickly read the relevant information about the MD settings. Importantly, the `` tag gives you information about the physical properties that are printed out by i-PI. This simulation gives as output a .out file containing the time-evolution of all the relevant physical quantities, while the .xc.xyz file contains the full trajectory, which you can later visualize with VMD. The file `h5o2+.xyz` is both used by i-PI as a starting configuration of the trajectory and by Librascal as a template to load information about chemical species and number of atoms per species of the system.\n", "\n", "As the simulation evolves you can plot some interesting physical properties, for instance the MD kinetic energy, the total potential energy and the pressure, and check for thermalization." ] @@ -644,7 +650,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "An example script is provided to launch a MD simulation with i-Pi and the model that has just been fitted `zundel_model.json`:\n", + "An example script is provided to launch a MD simulation with i-Pi and the model that has just been fitted `zundel_model.json` (note that you may need to edit some variables in the script, or set `$PATH` and `$PYTHONPATH` appropriately, in order to make it work on your system):\n", "\n", "```bash\n", "bash ./run.sh\n", diff --git a/examples/iPi/zundel/zundel_dataset.xyz b/examples/i-PI/zundel/zundel_dataset.xyz similarity index 100% rename from examples/iPi/zundel/zundel_dataset.xyz rename to examples/i-PI/zundel/zundel_dataset.xyz diff --git a/examples/iPi/zundel/zundel_energies.txt b/examples/i-PI/zundel/zundel_energies.txt similarity index 100% rename from examples/iPi/zundel/zundel_energies.txt rename to examples/i-PI/zundel/zundel_energies.txt