From bdf21dc43b6045c5d363893404f72a18e6a8d64a Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 20 Jun 2022 15:16:07 -0700
Subject: [PATCH 01/50] bump version

---
 nequip/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nequip/_version.py b/nequip/_version.py
index 91faf40b..b02164d2 100644
--- a/nequip/_version.py
+++ b/nequip/_version.py
@@ -2,4 +2,4 @@
 # See Python packaging guide
 # https://packaging.python.org/guides/single-sourcing-package-version/
 
-__version__ = "0.5.5"
+__version__ = "0.5.6"

From fd3a3f4bd2894757100833b6a900505bc642f481 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 20 Jun 2022 15:46:04 -0700
Subject: [PATCH 02/50] fix CI versions

---
 .github/workflows/tests.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index dea57ca1..b93d1ccd 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -15,8 +15,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.9]
-        torch-version: [1.10.0, 1.11.0]
+        python-version: [3.9]
+        torch-version: [1.10.1, 1.11.0]
 
     steps:
     - uses: actions/checkout@v2

From 2a444e376fb1291a99dc272d92ea63562a146c8a Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 6 Sep 2022 17:56:15 -0400
Subject: [PATCH 03/50] Report num (trainable) weights

---
 CHANGELOG.md                | 2 ++
 nequip/scripts/benchmark.py | 4 ++++
 nequip/train/trainer.py     | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f13a642..4d8bbe93 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,8 @@ Most recent change on the bottom.
 
 
 ## [Unreleased] - 0.5.6
+### Added
+- `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
 
 ## [0.5.5] - 2022-06-20
 ### Added
diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index 579e60ea..e1ac63a9 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -126,6 +126,10 @@ def main(args=None):
     model = model_from_config(config, initialize=True, dataset=dataset)
     model_time = time.time() - model_time
     print(f"    building model took {model_time:.4f}s")
+    print(f"    model has {sum(p.numel() for p in model.parameters())} weights")
+    print(
+        f"    model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable weights"
+    )
     print("Compile...")
     # "Deploy" it
     model.eval()
diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py
index 7ce4d4e1..8a8f28ed 100644
--- a/nequip/train/trainer.py
+++ b/nequip/train/trainer.py
@@ -706,6 +706,9 @@ def init(self):
 
         self.num_weights = sum(p.numel() for p in self.model.parameters())
         self.logger.info(f"Number of weights: {self.num_weights}")
+        self.logger.info(
+            f"Number of trainable weights: {sum(p.numel() for p in self.model.parameters() if p.requires_grad)}"
+        )
 
         self.rescale_layers = []
         outer_layer = self.model

From b827bba56fd12e46cf14e581f37ac7e7a0b0d0c6 Mon Sep 17 00:00:00 2001
From: Lixin Sun <lixinsun@microsoft.com>
Date: Mon, 12 Sep 2022 23:55:03 +0100
Subject: [PATCH 04/50] make dataset an optional argument for rescaling builder

---
 nequip/model/_scaling.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/nequip/model/_scaling.py b/nequip/model/_scaling.py
index f5554d25..b3f0f4d0 100644
--- a/nequip/model/_scaling.py
+++ b/nequip/model/_scaling.py
@@ -12,7 +12,7 @@
 
 
 def RescaleEnergyEtc(
-    model: GraphModuleMixin, config, dataset: AtomicDataset, initialize: bool
+    model: GraphModuleMixin, config, initialize: bool, dataset: Optional[AtomicDataset] = None
 ):
     return GlobalRescale(
         model=model,
@@ -34,7 +34,6 @@ def RescaleEnergyEtc(
 def GlobalRescale(
     model: GraphModuleMixin,
     config,
-    dataset: AtomicDataset,
     initialize: bool,
     module_prefix: str,
     default_scale: Union[str, float, list],
@@ -43,6 +42,7 @@ def GlobalRescale(
     default_shift_keys: list,
     default_related_scale_keys: list,
     default_related_shift_keys: list,
+    dataset: Optional[AtomicDataset] = None,
 ):
     """Add global rescaling for energy(-based quantities).
 
@@ -75,11 +75,12 @@ def GlobalRescale(
                 raise ValueError(f"Invalid global scale `{value}`")
 
         # = Compute shifts and scales =
-        computed_stats = _compute_stats(
-            str_names=str_names,
-            dataset=dataset,
-            stride=config.dataset_statistics_stride,
-        )
+        if len(str_names) > 0:
+            computed_stats = _compute_stats(
+                str_names=str_names,
+                dataset=dataset,
+                stride=config.dataset_statistics_stride,
+            )
 
         if isinstance(global_scale, str):
             s = global_scale
@@ -129,8 +130,8 @@ def GlobalRescale(
 def PerSpeciesRescale(
     model: GraphModuleMixin,
     config,
-    dataset: AtomicDataset,
     initialize: bool,
+    dataset: Optional[AtomicDataset] = None,
 ):
     """Add global rescaling for energy(-based quantities).
 
@@ -199,12 +200,13 @@ def PerSpeciesRescale(
                 ], "Requested to set either the shifts or scales of the per_species_rescale using dataset values, but chose to provide the other in non-dataset units. Please give the explictly specified shifts/scales in dataset units and set per_species_rescale_arguments_in_dataset_units"
 
         # = Compute shifts and scales =
-        computed_stats = _compute_stats(
-            str_names=str_names,
-            dataset=dataset,
-            stride=config.dataset_statistics_stride,
-            kwargs=config.get(module_prefix + "_kwargs", {}),
-        )
+        if len(str_names) > 0:
+            computed_stats = _compute_stats(
+                str_names=str_names,
+                dataset=dataset,
+                stride=config.dataset_statistics_stride,
+                kwargs=config.get(module_prefix + "_kwargs", {}),
+            )
 
         if isinstance(scales, str):
             s = scales

From 8ad52e7fdf2f4d429e943ba69c8306dd06ede623 Mon Sep 17 00:00:00 2001
From: Lixin Sun <lixinsun@microsoft.com>
Date: Mon, 12 Sep 2022 23:57:32 +0100
Subject: [PATCH 05/50] black

---
 nequip/model/_scaling.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/nequip/model/_scaling.py b/nequip/model/_scaling.py
index b3f0f4d0..8a7ffa46 100644
--- a/nequip/model/_scaling.py
+++ b/nequip/model/_scaling.py
@@ -12,7 +12,10 @@
 
 
 def RescaleEnergyEtc(
-    model: GraphModuleMixin, config, initialize: bool, dataset: Optional[AtomicDataset] = None
+    model: GraphModuleMixin,
+    config,
+    initialize: bool,
+    dataset: Optional[AtomicDataset] = None,
 ):
     return GlobalRescale(
         model=model,

From 4b2ef8132e46cb9d5c9828160b3477f821da6149 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 13 Sep 2022 14:13:38 -0400
Subject: [PATCH 06/50] document `model_builders` in `full.yaml`

---
 configs/full.yaml | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/configs/full.yaml b/configs/full.yaml
index 1b8a3a2c..2a44d981 100644
--- a/configs/full.yaml
+++ b/configs/full.yaml
@@ -17,7 +17,19 @@ default_dtype: float32
 allow_tf32: false                                                                 # whether to use TensorFloat32 if it is available
 # device:  cuda                                                                   # which device to use. Default: automatically detected cuda or "cpu"
 
-# network
+# == network ==
+
+# `model_builders` defines a series of functions that will be called to construct the model
+# each model builder has the opportunity to update the model, the config, or both
+# model builders from other packages are allowed (see mir-group/allegro for an example); those from `nequip.model` don't require a prefix
+# these are the default model builders:
+model_builders:
+ - SimpleIrrepsConfig         # update the config with all the irreps for the network if using the simplified `l_max` / `num_features` / `parity` syntax
+ - EnergyModel                # build a full NequIP model
+ - PerSpeciesRescale          # add per-atom / per-species scaling and shifting to the NequIP model before the total energy sum
+ - ForceOutput                # wrap the energy model in a module that uses autodifferention to compute the forces
+ - RescaleEnergyEtc           # wrap the entire model in the appropriate global rescaling of the energy, forces, etc.
+
 r_max: 4.0                                                                        # cutoff radius in length units, here Angstrom, this is an important hyperparamter to scan
 num_layers: 4                                                                     # number of interaction blocks, we find 3-5 to work best
 

From 086d72422b4e15959fea1da54f8a2765dd91d655 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 19 Sep 2022 23:07:22 -0400
Subject: [PATCH 07/50] better message

---
 nequip/scripts/evaluate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py
index d67d750f..7c4e2abc 100644
--- a/nequip/scripts/evaluate.py
+++ b/nequip/scripts/evaluate.py
@@ -74,7 +74,7 @@ def main(args=None, running_as_script: bool = True):
     )
     parser.add_argument(
         "--batch-size",
-        help="Batch size to use. Larger is usually faster on GPU. If you run out of memory, lower this.",
+        help="Batch size to use. Larger is usually faster on GPU. If you run out of memory, lower this. You can also try to raise this for faster evaluation. Default: 50.",
         type=int,
         default=50,
     )

From 7da5b61d8b68194666e2952450fddbbcf5ba783b Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 17 Oct 2022 13:34:57 -0400
Subject: [PATCH 08/50] print dataset size

---
 nequip/data/dataset.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py
index 847b3795..2b2279d9 100644
--- a/nequip/data/dataset.py
+++ b/nequip/data/dataset.py
@@ -295,7 +295,13 @@ def process(self):
         # type conversion
         _process_dict(fixed_fields, ignore_fields=["r_max"])
 
-        logging.info(f"Loaded data: {data}")
+        total_MBs = sum(item.numel() * item.element_size() for _, item in data) / (
+            1024 * 1024
+        )
+        logging.info(
+            f"Loaded data: {data}\n    processed data size: ~{total_MBs:.2f} MB"
+        )
+        del total_MBs
 
         # use atomic writes to avoid race conditions between
         # different trainings that use the same dataset

From 17cb6f201f3a4e821627d0f8365664a2d548935c Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 17 Oct 2022 13:54:28 -0400
Subject: [PATCH 09/50] docs note

---
 configs/full.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configs/full.yaml b/configs/full.yaml
index 2a44d981..3c3e8984 100644
--- a/configs/full.yaml
+++ b/configs/full.yaml
@@ -29,6 +29,7 @@ model_builders:
  - PerSpeciesRescale          # add per-atom / per-species scaling and shifting to the NequIP model before the total energy sum
  - ForceOutput                # wrap the energy model in a module that uses autodifferention to compute the forces
  - RescaleEnergyEtc           # wrap the entire model in the appropriate global rescaling of the energy, forces, etc.
+#   ^ global rescaling blocks must always go last!
 
 r_max: 4.0                                                                        # cutoff radius in length units, here Angstrom, this is an important hyperparamter to scan
 num_layers: 4                                                                     # number of interaction blocks, we find 3-5 to work best

From da3e4bdf7ce9a25a1427f0efd15504dee71964df Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Sun, 6 Nov 2022 11:54:38 -0500
Subject: [PATCH 10/50] better error message

---
 nequip/data/AtomicData.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py
index 3f2e348b..f5b8339a 100644
--- a/nequip/data/AtomicData.py
+++ b/nequip/data/AtomicData.py
@@ -773,7 +773,7 @@ def neighbor_list_and_relative_vec(
         keep_edge = ~bad_edge
         if not np.any(keep_edge):
             raise ValueError(
-                "After eliminating self edges, no edges remain in this system."
+                f"Every single atom has no neighbors within the cutoff r_max={r_max} (after eliminating self edges, no edges remain in this system)"
             )
         first_idex = first_idex[keep_edge]
         second_idex = second_idex[keep_edge]

From bb2a1204c972cca9daed467a56236dc54aaf677b Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 7 Nov 2022 16:08:56 -0500
Subject: [PATCH 11/50] Fix error message typo

---
 nequip/train/trainer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py
index 8a8f28ed..55efec32 100644
--- a/nequip/train/trainer.py
+++ b/nequip/train/trainer.py
@@ -1180,7 +1180,9 @@ def set_dataset(
                 if self.n_train > len(dataset):
                     raise ValueError("Not enough data in dataset for requested n_train")
                 if self.n_val > len(validation_dataset):
-                    raise ValueError("Not enough data in dataset for requested n_train")
+                    raise ValueError(
+                        "Not enough data in validation dataset for requested n_val"
+                    )
                 if self.train_val_split == "random":
                     self.train_idcs = torch.randperm(
                         len(dataset), generator=self.dataset_rng

From d090f3224b0150cd878dde62d6a03e8c01033f0c Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 7 Nov 2022 16:18:39 -0500
Subject: [PATCH 12/50] avoid running `git` on `.egg`

---
 CHANGELOG.md        | 3 +++
 nequip/utils/git.py | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4d8bbe93..85e07593 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,9 @@ Most recent change on the bottom.
 ### Added
 - `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
 
+### Fixed
+- Git utilities when installed as ZIPed `.egg` (#264)
+
 ## [0.5.5] - 2022-06-20
 ### Added
 - BETA! Support for stress in training and inference
diff --git a/nequip/utils/git.py b/nequip/utils/git.py
index a78a87fc..a5fbe7f3 100644
--- a/nequip/utils/git.py
+++ b/nequip/utils/git.py
@@ -8,7 +8,14 @@
 def get_commit(module: str) -> Optional[str]:
 
     module = import_module(module)
-    path = str(Path(module.__file__).parents[0] / "..")
+    package = Path(module.__file__).parents[0]
+    if package.is_file():
+        # We're installed as a ZIP .egg file,
+        # which means there's no git information
+        # and looking for the parent would fail anyway
+        # https://github.com/mir-group/nequip/issues/264
+        return None
+    path = str(package / "..")
 
     retcode = subprocess.run(
         "git show --oneline --abbrev=40 -s".split(),

From 144139f5d527ee7eab6ddeddf8fcc91b18a65a8f Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 14 Nov 2022 23:42:33 -0500
Subject: [PATCH 13/50] always output something for stress

---
 nequip/nn/_grad_output.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nequip/nn/_grad_output.py b/nequip/nn/_grad_output.py
index ffc13140..673f8ff0 100644
--- a/nequip/nn/_grad_output.py
+++ b/nequip/nn/_grad_output.py
@@ -315,10 +315,10 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type:
                 torch.cross(cell[:, 1, :], cell[:, 2, :], dim=1),
             ).unsqueeze(-1)
             stress = virial / volume.view(-1, 1, 1)
-            data[AtomicDataDict.STRESS_KEY] = stress
             data[AtomicDataDict.CELL_KEY] = orig_cell
         else:
             stress = self._empty  # torchscript
+        data[AtomicDataDict.STRESS_KEY] = stress
 
         # see discussion in https://github.com/libAtoms/QUIP/issues/227 about sign convention
         # they say the standard convention is virial = -stress x volume

From e644c0d46cf7bb0b206ebda5de1fba4ac2a71494 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 14 Nov 2022 23:46:27 -0500
Subject: [PATCH 14/50] add EDGE_FEATURES

---
 nequip/data/AtomicData.py | 1 +
 nequip/data/_keys.py      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py
index f5b8339a..728c260b 100644
--- a/nequip/data/AtomicData.py
+++ b/nequip/data/AtomicData.py
@@ -48,6 +48,7 @@
     AtomicDataDict.EDGE_LENGTH_KEY,
     AtomicDataDict.EDGE_ATTRS_KEY,
     AtomicDataDict.EDGE_EMBEDDING_KEY,
+    AtomicDataDict.EDGE_FEATURES_KEY,
 }
 _DEFAULT_GRAPH_FIELDS: Set[str] = {
     AtomicDataDict.TOTAL_ENERGY_KEY,
diff --git a/nequip/data/_keys.py b/nequip/data/_keys.py
index c0535edd..54b66ce3 100644
--- a/nequip/data/_keys.py
+++ b/nequip/data/_keys.py
@@ -44,6 +44,7 @@
 EDGE_ATTRS_KEY: Final[str] = "edge_attrs"
 # [n_edge, dim] invariant embedding of the edges
 EDGE_EMBEDDING_KEY: Final[str] = "edge_embedding"
+EDGE_FEATURES_KEY: Final[str] = "edge_features"
 
 NODE_FEATURES_KEY: Final[str] = "node_features"
 NODE_ATTRS_KEY: Final[str] = "node_attrs"

From 3d44dcc9fd9b5c9db5b82c06b3cdb3c75310d768 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 15 Nov 2022 00:06:57 -0500
Subject: [PATCH 15/50] allow type mapper for GPU also

---
 nequip/data/transforms.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nequip/data/transforms.py b/nequip/data/transforms.py
index f2c7ec32..5a37e2be 100644
--- a/nequip/data/transforms.py
+++ b/nequip/data/transforms.py
@@ -121,11 +121,13 @@ def transform(self, atomic_numbers):
                 f"Data included atomic numbers {bad_set} that are not part of the atomic number -> type mapping!"
             )
 
-        return self._Z_to_index[atomic_numbers - self._min_Z]
+        return self._Z_to_index[atomic_numbers - self._min_Z].to(
+            device=atomic_numbers.device
+        )
 
     def untransform(self, atom_types):
         """Transform atom types back into atomic numbers"""
-        return self._index_to_Z[atom_types]
+        return self._index_to_Z[atom_types].to(device=atom_types.device)
 
     @property
     def has_chemical_symbols(self) -> bool:

From c1096e842807e9330fb87a490326f0fa5a6bf1af Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 15 Nov 2022 00:07:37 -0500
Subject: [PATCH 16/50] refactor unit tests

---
 nequip/utils/unittests/__init__.py            |   3 +
 nequip/utils/unittests/conftest.py            | 137 +++++
 .../utils/unittests/model_tests.py            | 536 ++++++++----------
 tests/conftest.py                             | 140 +----
 tests/unit/model/test_nequip_model.py         | 118 ++++
 5 files changed, 486 insertions(+), 448 deletions(-)
 create mode 100644 nequip/utils/unittests/__init__.py
 create mode 100644 nequip/utils/unittests/conftest.py
 rename tests/unit/model/test_eng_force.py => nequip/utils/unittests/model_tests.py (58%)
 create mode 100644 tests/unit/model/test_nequip_model.py

diff --git a/nequip/utils/unittests/__init__.py b/nequip/utils/unittests/__init__.py
new file mode 100644
index 00000000..2309cb02
--- /dev/null
+++ b/nequip/utils/unittests/__init__.py
@@ -0,0 +1,3 @@
+import pathlib
+
+CONFTEST_PATH = pathlib.Path(__file__).parent / "conftest.py"
diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py
new file mode 100644
index 00000000..060e5e7b
--- /dev/null
+++ b/nequip/utils/unittests/conftest.py
@@ -0,0 +1,137 @@
+from typing import List, Tuple
+import numpy as np
+import pathlib
+import pytest
+import tempfile
+import os
+
+from ase.atoms import Atoms
+from ase.build import molecule
+from ase.calculators.singlepoint import SinglePointCalculator
+from ase.io import write
+
+import torch
+
+from nequip.utils.test import set_irreps_debug
+from nequip.data import AtomicData, ASEDataset
+from nequip.data.transforms import TypeMapper
+from nequip.utils.torch_geometric import Batch
+from nequip.utils._global_options import _set_global_options
+from nequip.utils.misc import dtype_from_name
+
+if "NEQUIP_NUM_TASKS" not in os.environ:
+    # Test parallelization, but don't waste time spawning tons of workers if lots of cores available
+    os.environ["NEQUIP_NUM_TASKS"] = "2"
+
+# The default float tolerance
+FLOAT_TOLERANCE = {
+    t: torch.as_tensor(v, dtype=dtype_from_name(t))
+    for t, v in {"float32": 1e-3, "float64": 1e-10}.items()
+}
+
+
+@pytest.fixture(scope="session", autouse=True, params=["float32", "float64"])
+def float_tolerance(request):
+    """Run all tests with various PyTorch default dtypes.
+
+    This is a session-wide, autouse fixture — you only need to request it explicitly if a test needs to know the tolerance for the current default dtype.
+
+    Returns
+    --------
+        A precision threshold to use for closeness tests.
+    """
+    old_dtype = torch.get_default_dtype()
+    dtype = request.param
+    _set_global_options({"default_dtype": dtype})
+    yield FLOAT_TOLERANCE[dtype]
+    _set_global_options(
+        {
+            "default_dtype": {torch.float32: "float32", torch.float64: "float64"}[
+                old_dtype
+            ]
+        }
+    )
+
+
+# - Ampere and TF32 -
+# Many of the tests for NequIP involve numerically checking
+# algebraic properties— normalization, equivariance,
+# continuity, etc.
+# With the added numerical noise of TF32, some of those tests fail
+# with the current (and usually generous) thresholds.
+#
+# Thus we go on the assumption that PyTorch + NVIDIA got everything
+# right, that this setting DOES NOT AFFECT the model outputs except
+# for increased numerical noise, and only test without it.
+#
+# TODO: consider running tests with and without
+# TODO: check how much thresholds have to be changed to accomidate TF32
+torch.backends.cuda.matmul.allow_tf32 = False
+torch.backends.cudnn.allow_tf32 = False
+
+
+@pytest.fixture(scope="session")
+def BENCHMARK_ROOT():
+    return pathlib.Path(__file__).parent / "../benchmark_data/"
+
+
+@pytest.fixture(scope="session")
+def temp_data(float_tolerance):
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        yield tmpdirname
+
+
+@pytest.fixture(scope="session")
+def CH3CHO(CH3CHO_no_typemap) -> Tuple[Atoms, AtomicData]:
+    atoms, data = CH3CHO_no_typemap
+    tm = TypeMapper(chemical_symbol_to_type={"C": 0, "O": 1, "H": 2})
+    data = tm(data)
+    return atoms, data
+
+
+@pytest.fixture(scope="session")
+def CH3CHO_no_typemap(float_tolerance) -> Tuple[Atoms, AtomicData]:
+    atoms = molecule("CH3CHO")
+    data = AtomicData.from_ase(atoms, r_max=2.0)
+    return atoms, data
+
+
+@pytest.fixture(scope="session")
+def molecules() -> List[Atoms]:
+    atoms_list = []
+    for i in range(8):
+        atoms = molecule("CH3CHO" if i % 2 == 0 else "H2")
+        atoms.rattle()
+        atoms.calc = SinglePointCalculator(
+            energy=np.random.random(),
+            forces=np.random.random((len(atoms), 3)),
+            stress=None,
+            magmoms=None,
+            atoms=atoms,
+        )
+        atoms_list.append(atoms)
+    return atoms_list
+
+
+@pytest.fixture(scope="session")
+def nequip_dataset(molecules, temp_data, float_tolerance):
+    with tempfile.NamedTemporaryFile(suffix=".xyz") as fp:
+        for atoms in molecules:
+            write(fp.name, atoms, format="extxyz", append=True)
+        a = ASEDataset(
+            file_name=fp.name,
+            root=temp_data,
+            extra_fixed_fields={"r_max": 3.0},
+            ase_args=dict(format="extxyz"),
+            type_mapper=TypeMapper(chemical_symbol_to_type={"H": 0, "C": 1, "O": 2}),
+        )
+        yield a
+
+
+@pytest.fixture(scope="session")
+def atomic_batch(nequip_dataset):
+    return Batch.from_data_list([nequip_dataset[0], nequip_dataset[1]])
+
+
+# Use debug mode
+set_irreps_debug(True)
diff --git a/tests/unit/model/test_eng_force.py b/nequip/utils/unittests/model_tests.py
similarity index 58%
rename from tests/unit/model/test_eng_force.py
rename to nequip/utils/unittests/model_tests.py
index 0adcd4c9..372571d4 100644
--- a/tests/unit/model/test_eng_force.py
+++ b/nequip/utils/unittests/model_tests.py
@@ -1,150 +1,81 @@
 import pytest
 
-import logging
 import tempfile
 import functools
 import torch
 
 import numpy as np
 
-from e3nn import o3
 from e3nn.util.jit import script
 
-from nequip.data import AtomicDataDict, AtomicData, Collater
+from nequip.data import (
+    AtomicDataDict,
+    AtomicData,
+    Collater,
+    _GRAPH_FIELDS,
+    _NODE_FIELDS,
+)
 from nequip.data.transforms import TypeMapper
-from nequip.model import model_from_config, uniform_initialize_FCs
-from nequip.nn import GraphModuleMixin, AtomwiseLinear
+from nequip.model import model_from_config
+from nequip.nn import GraphModuleMixin
 from nequip.utils.test import assert_AtomicData_equivariant
 
 
-logging.basicConfig(level=logging.DEBUG)
-
-COMMON_CONFIG = {
-    "num_types": 3,
-    "types_names": ["H", "C", "O"],
-    "avg_num_neighbors": None,
-}
-r_max = 3
-minimal_config1 = dict(
-    irreps_edge_sh="0e + 1o",
-    r_max=4,
-    feature_irreps_hidden="4x0e + 4x1o",
-    num_layers=2,
-    num_basis=8,
-    PolynomialCutoff_p=6,
-    nonlinearity_type="norm",
-    **COMMON_CONFIG
-)
-minimal_config2 = dict(
-    irreps_edge_sh="0e + 1o",
-    r_max=4,
-    chemical_embedding_irreps_out="8x0e + 8x0o + 8x1e + 8x1o",
-    irreps_mid_output_block="2x0e",
-    feature_irreps_hidden="4x0e + 4x1o",
-    **COMMON_CONFIG
-)
-minimal_config3 = dict(
-    irreps_edge_sh="0e + 1o",
-    r_max=4,
-    feature_irreps_hidden="4x0e + 4x1o",
-    num_layers=2,
-    num_basis=8,
-    PolynomialCutoff_p=6,
-    nonlinearity_type="gate",
-    **COMMON_CONFIG
-)
-minimal_config4 = dict(
-    irreps_edge_sh="0e + 1o + 2e",
-    r_max=4,
-    feature_irreps_hidden="2x0e + 2x1o + 2x2e",
-    num_layers=2,
-    num_basis=3,
-    PolynomialCutoff_p=6,
-    nonlinearity_type="gate",
-    # test custom nonlinearities
-    nonlinearity_scalars={"e": "silu", "o": "tanh"},
-    nonlinearity_gates={"e": "silu", "o": "abs"},
-    **COMMON_CONFIG
-)
-
-
-@pytest.fixture(
-    scope="module",
-    params=[minimal_config1, minimal_config2, minimal_config3, minimal_config4],
-)
-def config(request):
-    return request.param
+# see https://github.com/pytest-dev/pytest/issues/421#issuecomment-943386533
+# to allow external packages to import tests through subclassing
+class BaseModelTests:
+    @pytest.fixture(scope="class")
+    def config(self):
+        """Implemented by subclasses.
 
+        Return a tuple of config, out_field
+        """
+        raise NotImplementedError
 
-@pytest.fixture(
-    params=[
-        (
-            ["EnergyModel", "ForceOutput"],
-            AtomicDataDict.FORCE_KEY,
+    @pytest.fixture(
+        scope="class",
+        params=(
+            [torch.device("cuda"), torch.device("cpu")]
+            if torch.cuda.is_available()
+            else [torch.device("cpu")]
         ),
-        (
-            ["EnergyModel"],
-            AtomicDataDict.TOTAL_ENERGY_KEY,
-        ),
-        (
-            ["EnergyModel", "StressForceOutput"],
-            AtomicDataDict.STRESS_KEY,
-        ),
-    ]
-)
-def model(request, config):
-    torch.manual_seed(0)
-    np.random.seed(0)
-    builder, out_field = request.param
-    config = config.copy()
-    config["model_builders"] = builder
-    return model_from_config(config), out_field
-
-
-@pytest.fixture(
-    scope="module",
-    params=(
-        [torch.device("cuda"), torch.device("cpu")]
-        if torch.cuda.is_available()
-        else [torch.device("cpu")]
-    ),
-)
-def device(request):
-    return request.param
-
-
-class TestWorkflow:
-    """
-    test class methods
-    """
+    )
+    def device(self, request):
+        return request.param
+
+    @pytest.fixture(scope="class")
+    def model(self, config, device):
+        config, out_fields = config
+        torch.manual_seed(0)
+        np.random.seed(0)
+        config = config.copy()
+        config.update(
+            {
+                "num_types": 3,
+                "types_names": ["H", "C", "O"],
+            }
+        )
+        model = model_from_config(config)
+        model = model.to(device)
+        return model, out_fields
 
+    # == common tests for all models ==
     def test_init(self, model):
         instance, _ = model
         assert isinstance(instance, GraphModuleMixin)
 
-    def test_weight_init(self, model, atomic_batch, device):
-        instance, out_field = model
-        data = AtomicData.to_AtomicDataDict(atomic_batch.to(device=device))
-        instance = instance.to(device=device)
-
-        out_orig = instance(data)[out_field]
-
-        instance = uniform_initialize_FCs(instance, initialize=True)
-
-        out_unif = instance(data)[out_field]
-        assert not torch.allclose(out_orig, out_unif)
-
     def test_jit(self, model, atomic_batch, device):
-        instance, out_field = model
+        instance, out_fields = model
         data = AtomicData.to_AtomicDataDict(atomic_batch.to(device=device))
         instance = instance.to(device=device)
         model_script = script(instance)
 
-        assert torch.allclose(
-            instance(data)[out_field],
-            model_script(data)[out_field],
-            atol=1e-6,
-        )
+        for out_field in out_fields:
+            assert torch.allclose(
+                instance(data)[out_field],
+                model_script(data)[out_field],
+                atol=1e-6,
+            )
 
         # - Try saving, loading in another process, and running -
         with tempfile.TemporaryDirectory() as tmpdir:
@@ -163,39 +94,25 @@ def test_jit(self, model, atomic_batch, device):
                 torch.float64: 1e-10,
             }[torch.get_default_dtype()]
 
-            assert torch.allclose(
-                model_script(data)[out_field],
-                load_model(load_dat)[out_field],
-                atol=atol,
-            )
-
-    def test_submods(self):
-        config = minimal_config2.copy()
-        config["model_builders"] = ["EnergyModel"]
-        model = model_from_config(config=config, initialize=True)
-        assert isinstance(model.chemical_embedding, AtomwiseLinear)
-        true_irreps = o3.Irreps(minimal_config2["chemical_embedding_irreps_out"])
-        assert (
-            model.chemical_embedding.irreps_out[model.chemical_embedding.out_field]
-            == true_irreps
-        )
-        # Make sure it propagates
-        assert (
-            model.layer0_convnet.irreps_in[model.chemical_embedding.out_field]
-            == true_irreps
-        )
+            for out_field in out_fields:
+                assert torch.allclose(
+                    model_script(data)[out_field],
+                    load_model(load_dat)[out_field],
+                    atol=atol,
+                )
 
     def test_forward(self, model, atomic_batch, device):
-        instance, out_field = model
+        instance, out_fields = model
         instance.to(device)
         data = atomic_batch.to(device)
         output = instance(AtomicData.to_AtomicDataDict(data))
-        assert out_field in output
+        for out_field in out_fields:
+            assert out_field in output
 
     def test_batch(self, model, atomic_batch, device, float_tolerance):
         """Confirm that the results for individual examples are the same regardless of whether they are batched."""
         allclose = functools.partial(torch.allclose, atol=float_tolerance)
-        instance, out_field = model
+        instance, out_fields = model
         instance.to(device)
         data = atomic_batch.to(device)
         data1 = data.get_example(0)
@@ -203,33 +120,164 @@ def test_batch(self, model, atomic_batch, device, float_tolerance):
         output1 = instance(AtomicData.to_AtomicDataDict(data1))
         output2 = instance(AtomicData.to_AtomicDataDict(data2))
         output = instance(AtomicData.to_AtomicDataDict(data))
-        if out_field in (AtomicDataDict.TOTAL_ENERGY_KEY, AtomicDataDict.STRESS_KEY):
-            assert allclose(
-                output1[out_field],
-                output[out_field][0],
-            )
-            assert allclose(
-                output2[out_field],
-                output[out_field][1],
-            )
-        elif out_field in (AtomicDataDict.FORCE_KEY,):
-            assert allclose(
-                output1[out_field],
-                output[out_field][output[AtomicDataDict.BATCH_KEY] == 0],
-            )
-            assert allclose(
-                output2[out_field],
-                output[out_field][output[AtomicDataDict.BATCH_KEY] == 1],
-            )
+        for out_field in out_fields:
+            if out_field in _GRAPH_FIELDS:
+                assert allclose(
+                    output1[out_field],
+                    output[out_field][0],
+                )
+                assert allclose(
+                    output2[out_field],
+                    output[out_field][1],
+                )
+            elif out_field in _NODE_FIELDS:
+                assert allclose(
+                    output1[out_field],
+                    output[out_field][output[AtomicDataDict.BATCH_KEY] == 0],
+                )
+                assert allclose(
+                    output2[out_field],
+                    output[out_field][output[AtomicDataDict.BATCH_KEY] == 1],
+                )
+            else:
+                raise NotImplementedError
+
+    def test_equivariance(self, model, atomic_batch, device):
+        instance, out_fields = model
+        instance = instance.to(device=device)
+        atomic_batch = atomic_batch.to(device=device)
+        assert_AtomicData_equivariant(func=instance, data_in=atomic_batch)
+
+    def test_embedding_cutoff(self, model, config, device):
+        instance, out_fields = model
+        config, out_fields = config
+        r_max = config["r_max"]
+
+        # make a synthetic three atom example
+        data = AtomicData(
+            atom_types=np.random.choice([0, 1, 2], size=3),
+            pos=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]),
+            edge_index=np.array([[0, 1, 0, 2], [1, 0, 2, 0]]),
+        )
+        data = data.to(device)
+        edge_embed = instance(AtomicData.to_AtomicDataDict(data))
+        if AtomicDataDict.EDGE_FEATURES_KEY in edge_embed:
+            key = AtomicDataDict.EDGE_FEATURES_KEY
         else:
-            raise NotImplementedError
+            key = AtomicDataDict.EDGE_EMBEDDING_KEY
+        edge_embed = edge_embed[key]
+        data.pos[2, 1] = r_max  # put it past the cutoff
+        edge_embed2 = instance(AtomicData.to_AtomicDataDict(data))[key]
 
+        assert torch.allclose(edge_embed[:2], edge_embed2[:2])
+        assert edge_embed[2:].abs().sum() > 1e-6  # some nonzero terms
+        assert torch.allclose(edge_embed2[2:], torch.zeros(1, device=device))
 
-class TestGradient:
-    def test_numeric_gradient(self, config, atomic_batch, device, float_tolerance):
-        config = config.copy()
-        config["model_builders"] = ["EnergyModel", "ForceOutput"]
-        model = model_from_config(config=config, initialize=True)
+        # test gradients
+        in_dict = AtomicData.to_AtomicDataDict(data)
+        in_dict[AtomicDataDict.POSITIONS_KEY].requires_grad_(True)
+
+        with torch.autograd.set_detect_anomaly(True):
+            out = instance(in_dict)
+
+            # is the edge embedding of the cutoff length edge unchanged at the cutoff?
+            grads = torch.autograd.grad(
+                outputs=out[key][2:].sum(),
+                inputs=in_dict[AtomicDataDict.POSITIONS_KEY],
+                retain_graph=True,
+            )[0]
+            assert torch.allclose(grads, torch.zeros(1, device=device))
+
+            if AtomicDataDict.PER_ATOM_ENERGY_KEY in out:
+                # are the first two atom's energies unaffected by atom at the cutoff?
+                grads = torch.autograd.grad(
+                    outputs=out[AtomicDataDict.PER_ATOM_ENERGY_KEY][:2].sum(),
+                    inputs=in_dict[AtomicDataDict.POSITIONS_KEY],
+                )[0]
+                print(grads)
+                # only care about gradient wrt moved atom
+                assert grads.shape == (3, 3)
+                assert torch.allclose(grads[2], torch.zeros(1, device=device))
+
+
+class BaseEnergyModelTests(BaseModelTests):
+    def test_large_separation(self, model, config, molecules, device):
+        atol = {torch.float32: 1e-4, torch.float64: 1e-10}[torch.get_default_dtype()]
+        instance, _ = model
+        instance.to(device)
+        config, out_fields = config
+        r_max = config["r_max"]
+        atoms1 = molecules[0].copy()
+        atoms2 = molecules[1].copy()
+        # translate atoms2 far away
+        atoms2.positions += 40.0 + np.random.randn(3)
+        atoms_both = atoms1.copy()
+        atoms_both.extend(atoms2)
+        tm = TypeMapper(chemical_symbols=["H", "C", "O"])
+        data1 = tm(AtomicData.from_ase(atoms1, r_max=r_max).to(device=device))
+        data2 = tm(AtomicData.from_ase(atoms2, r_max=r_max).to(device=device))
+        data_both = tm(AtomicData.from_ase(atoms_both, r_max=r_max).to(device=device))
+        assert (
+            data_both[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
+            == data1[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
+            + data2[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
+        )
+
+        out1 = instance(AtomicData.to_AtomicDataDict(data1))
+        out2 = instance(AtomicData.to_AtomicDataDict(data2))
+        out_both = instance(AtomicData.to_AtomicDataDict(data_both))
+
+        assert torch.allclose(
+            out1[AtomicDataDict.TOTAL_ENERGY_KEY]
+            + out2[AtomicDataDict.TOTAL_ENERGY_KEY],
+            out_both[AtomicDataDict.TOTAL_ENERGY_KEY],
+            atol=atol,
+        )
+
+        atoms_both2 = atoms1.copy()
+        atoms3 = atoms2.copy()
+        atoms3.positions += np.random.randn(3)
+        atoms_both2.extend(atoms3)
+        data_both2 = tm(AtomicData.from_ase(atoms_both2, r_max=r_max).to(device=device))
+        out_both2 = instance(AtomicData.to_AtomicDataDict(data_both2))
+        assert torch.allclose(
+            out_both2[AtomicDataDict.TOTAL_ENERGY_KEY],
+            out_both[AtomicDataDict.TOTAL_ENERGY_KEY],
+            atol=atol,
+        )
+        assert torch.allclose(
+            out_both2[AtomicDataDict.PER_ATOM_ENERGY_KEY],
+            out_both[AtomicDataDict.PER_ATOM_ENERGY_KEY],
+            atol=atol,
+        )
+
+    def test_cross_frame_grad(self, model, device, nequip_dataset):
+        c = Collater.for_dataset(nequip_dataset)
+        batch = c([nequip_dataset[i] for i in range(len(nequip_dataset))])
+        energy_model, out_fields = model
+        energy_model.to(device)
+        data = AtomicData.to_AtomicDataDict(batch.to(device))
+        data[AtomicDataDict.POSITIONS_KEY].requires_grad = True
+
+        output = energy_model(data)
+        grads = torch.autograd.grad(
+            outputs=output[AtomicDataDict.TOTAL_ENERGY_KEY][-1],
+            inputs=data[AtomicDataDict.POSITIONS_KEY],
+            allow_unused=True,
+        )[0]
+
+        last_frame_n_atom = batch.ptr[-1] - batch.ptr[-2]
+
+        in_frame_grad = grads[-last_frame_n_atom:]
+        cross_frame_grad = grads[:-last_frame_n_atom]
+
+        assert cross_frame_grad.abs().max().item() == 0
+        assert in_frame_grad.abs().max().item() > 0
+
+    def test_numeric_gradient(self, model, atomic_batch, device):
+        model, out_fields = model
+        if AtomicDataDict.FORCE_KEY not in out_fields:
+            pytest.skip()
         model.to(device)
         data = atomic_batch.to(device)
         output = model(AtomicData.to_AtomicDataDict(data))
@@ -256,16 +304,15 @@ def test_numeric_gradient(self, config, atomic_batch, device, float_tolerance):
                 numeric, analytical, rtol=5e-2
             )
 
-    def test_partial_forces(self, atomic_batch, device):
-        config = minimal_config1.copy()
-        config["model_builders"] = [
-            "EnergyModel",
-            "ForceOutput",
-        ]
+    def test_partial_forces(self, config, atomic_batch, device):
+        config, out_fields = config
+        if "ForceOutput" not in config["model_builders"]:
+            pytest.skip()
+        config = config.copy()
         partial_config = config.copy()
         partial_config["model_builders"] = [
-            "EnergyModel",
-            "PartialForceOutput",
+            "PartialForceOutput" if b == "ForceOutput" else b
+            for b in partial_config["model_builders"]
         ]
         model = model_from_config(config=config, initialize=True)
         partial_model = model_from_config(config=partial_config, initialize=True)
@@ -284,7 +331,7 @@ def test_partial_forces(self, atomic_batch, device):
                 assert torch.allclose(
                     output[k],
                     output_partial[k],
-                    atol=1e-6 if k == AtomicDataDict.FORCE_KEY else 1e-8,
+                    atol=1e-8 if k == AtomicDataDict.TOTAL_ENERGY_KEY else 1e-6,
                 )
             else:
                 assert torch.equal(output[k], output_partial[k])
@@ -307,138 +354,3 @@ def test_partial_forces(self, atomic_batch, device):
                 AtomicDataDict.BATCH_KEY
             ].view(1, -1)
         assert torch.equal(adjacency, torch.any(partial_forces != 0, dim=-1))
-
-
-class TestAutoGradient:
-    def test_cross_frame_grad(self, config, nequip_dataset):
-        c = Collater.for_dataset(nequip_dataset)
-        batch = c([nequip_dataset[i] for i in range(len(nequip_dataset))])
-        device = "cpu"
-        config = config.copy()
-        config["model_builders"] = ["EnergyModel"]
-        energy_model = model_from_config(config=config, initialize=True)
-        energy_model.to(device)
-        data = AtomicData.to_AtomicDataDict(batch.to(device))
-        data[AtomicDataDict.POSITIONS_KEY].requires_grad = True
-
-        output = energy_model(data)
-        grads = torch.autograd.grad(
-            outputs=output[AtomicDataDict.TOTAL_ENERGY_KEY][-1],
-            inputs=data[AtomicDataDict.POSITIONS_KEY],
-            allow_unused=True,
-        )[0]
-
-        last_frame_n_atom = batch.ptr[-1] - batch.ptr[-2]
-
-        in_frame_grad = grads[-last_frame_n_atom:]
-        cross_frame_grad = grads[:-last_frame_n_atom]
-
-        assert cross_frame_grad.abs().max().item() == 0
-        assert in_frame_grad.abs().max().item() > 0
-
-
-class TestEquivariance:
-    def test_forward(self, model, atomic_batch, device):
-        instance, out_field = model
-        instance = instance.to(device=device)
-        atomic_batch = atomic_batch.to(device=device)
-        assert_AtomicData_equivariant(func=instance, data_in=atomic_batch)
-
-
-class TestCutoff:
-    def test_large_separation(self, model, config, molecules):
-        atol = {torch.float32: 1e-4, torch.float64: 1e-10}[torch.get_default_dtype()]
-        instance, _ = model
-        r_max = config["r_max"]
-        atoms1 = molecules[0].copy()
-        atoms2 = molecules[1].copy()
-        # translate atoms2 far away
-        atoms2.positions += 40.0 + np.random.randn(3)
-        atoms_both = atoms1.copy()
-        atoms_both.extend(atoms2)
-        tm = TypeMapper(chemical_symbols=["H", "C", "O"])
-        data1 = tm(AtomicData.from_ase(atoms1, r_max=r_max))
-        data2 = tm(AtomicData.from_ase(atoms2, r_max=r_max))
-        data_both = tm(AtomicData.from_ase(atoms_both, r_max=r_max))
-        assert (
-            data_both[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
-            == data1[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
-            + data2[AtomicDataDict.EDGE_INDEX_KEY].shape[1]
-        )
-
-        out1 = instance(AtomicData.to_AtomicDataDict(data1))
-        out2 = instance(AtomicData.to_AtomicDataDict(data2))
-        out_both = instance(AtomicData.to_AtomicDataDict(data_both))
-
-        assert torch.allclose(
-            out1[AtomicDataDict.TOTAL_ENERGY_KEY]
-            + out2[AtomicDataDict.TOTAL_ENERGY_KEY],
-            out_both[AtomicDataDict.TOTAL_ENERGY_KEY],
-            atol=atol,
-        )
-
-        atoms_both2 = atoms1.copy()
-        atoms3 = atoms2.copy()
-        atoms3.positions += np.random.randn(3)
-        atoms_both2.extend(atoms3)
-        data_both2 = tm(AtomicData.from_ase(atoms_both2, r_max=r_max))
-        out_both2 = instance(AtomicData.to_AtomicDataDict(data_both2))
-        assert torch.allclose(
-            out_both2[AtomicDataDict.TOTAL_ENERGY_KEY],
-            out_both[AtomicDataDict.TOTAL_ENERGY_KEY],
-            atol=atol,
-        )
-        assert torch.allclose(
-            out_both2[AtomicDataDict.PER_ATOM_ENERGY_KEY],
-            out_both[AtomicDataDict.PER_ATOM_ENERGY_KEY],
-            atol=atol,
-        )
-
-    def test_embedding_cutoff(self, config):
-        config = config.copy()
-        config["model_builders"] = ["EnergyModel"]
-        instance = model_from_config(config=config, initialize=True)
-        r_max = config["r_max"]
-
-        # make a synthetic three atom example
-        data = AtomicData(
-            atom_types=np.random.choice([0, 1, 2], size=3),
-            pos=np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]),
-            edge_index=np.array([[0, 1, 0, 2], [1, 0, 2, 0]]),
-        )
-        edge_embed = instance(AtomicData.to_AtomicDataDict(data))[
-            AtomicDataDict.EDGE_EMBEDDING_KEY
-        ]
-        data.pos[2, 1] = r_max  # put it past the cutoff
-        edge_embed2 = instance(AtomicData.to_AtomicDataDict(data))[
-            AtomicDataDict.EDGE_EMBEDDING_KEY
-        ]
-
-        assert torch.allclose(edge_embed[:2], edge_embed2[:2])
-        assert edge_embed[2:].abs().sum() > 1e-6  # some nonzero terms
-        assert torch.allclose(edge_embed2[2:], torch.zeros(1))
-
-        # test gradients
-        in_dict = AtomicData.to_AtomicDataDict(data)
-        in_dict[AtomicDataDict.POSITIONS_KEY].requires_grad_(True)
-
-        with torch.autograd.set_detect_anomaly(True):
-            out = instance(in_dict)
-
-            # is the edge embedding of the cutoff length edge unchanged at the cutoff?
-            grads = torch.autograd.grad(
-                outputs=out[AtomicDataDict.EDGE_EMBEDDING_KEY][2:].sum(),
-                inputs=in_dict[AtomicDataDict.POSITIONS_KEY],
-                retain_graph=True,
-            )[0]
-            assert torch.allclose(grads, torch.zeros(1))
-
-            # are the first two atom's energies unaffected by atom at the cutoff?
-            grads = torch.autograd.grad(
-                outputs=out[AtomicDataDict.PER_ATOM_ENERGY_KEY][:2].sum(),
-                inputs=in_dict[AtomicDataDict.POSITIONS_KEY],
-            )[0]
-            print(grads)
-            # only care about gradient wrt moved atom
-            assert grads.shape == (3, 3)
-            assert torch.allclose(grads[2], torch.zeros(1))
diff --git a/tests/conftest.py b/tests/conftest.py
index 060e5e7b..e9719bcd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,137 +1,5 @@
-from typing import List, Tuple
-import numpy as np
-import pathlib
-import pytest
-import tempfile
-import os
+from nequip.utils.unittests import CONFTEST_PATH
 
-from ase.atoms import Atoms
-from ase.build import molecule
-from ase.calculators.singlepoint import SinglePointCalculator
-from ase.io import write
-
-import torch
-
-from nequip.utils.test import set_irreps_debug
-from nequip.data import AtomicData, ASEDataset
-from nequip.data.transforms import TypeMapper
-from nequip.utils.torch_geometric import Batch
-from nequip.utils._global_options import _set_global_options
-from nequip.utils.misc import dtype_from_name
-
-if "NEQUIP_NUM_TASKS" not in os.environ:
-    # Test parallelization, but don't waste time spawning tons of workers if lots of cores available
-    os.environ["NEQUIP_NUM_TASKS"] = "2"
-
-# The default float tolerance
-FLOAT_TOLERANCE = {
-    t: torch.as_tensor(v, dtype=dtype_from_name(t))
-    for t, v in {"float32": 1e-3, "float64": 1e-10}.items()
-}
-
-
-@pytest.fixture(scope="session", autouse=True, params=["float32", "float64"])
-def float_tolerance(request):
-    """Run all tests with various PyTorch default dtypes.
-
-    This is a session-wide, autouse fixture — you only need to request it explicitly if a test needs to know the tolerance for the current default dtype.
-
-    Returns
-    --------
-        A precision threshold to use for closeness tests.
-    """
-    old_dtype = torch.get_default_dtype()
-    dtype = request.param
-    _set_global_options({"default_dtype": dtype})
-    yield FLOAT_TOLERANCE[dtype]
-    _set_global_options(
-        {
-            "default_dtype": {torch.float32: "float32", torch.float64: "float64"}[
-                old_dtype
-            ]
-        }
-    )
-
-
-# - Ampere and TF32 -
-# Many of the tests for NequIP involve numerically checking
-# algebraic properties— normalization, equivariance,
-# continuity, etc.
-# With the added numerical noise of TF32, some of those tests fail
-# with the current (and usually generous) thresholds.
-#
-# Thus we go on the assumption that PyTorch + NVIDIA got everything
-# right, that this setting DOES NOT AFFECT the model outputs except
-# for increased numerical noise, and only test without it.
-#
-# TODO: consider running tests with and without
-# TODO: check how much thresholds have to be changed to accomidate TF32
-torch.backends.cuda.matmul.allow_tf32 = False
-torch.backends.cudnn.allow_tf32 = False
-
-
-@pytest.fixture(scope="session")
-def BENCHMARK_ROOT():
-    return pathlib.Path(__file__).parent / "../benchmark_data/"
-
-
-@pytest.fixture(scope="session")
-def temp_data(float_tolerance):
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        yield tmpdirname
-
-
-@pytest.fixture(scope="session")
-def CH3CHO(CH3CHO_no_typemap) -> Tuple[Atoms, AtomicData]:
-    atoms, data = CH3CHO_no_typemap
-    tm = TypeMapper(chemical_symbol_to_type={"C": 0, "O": 1, "H": 2})
-    data = tm(data)
-    return atoms, data
-
-
-@pytest.fixture(scope="session")
-def CH3CHO_no_typemap(float_tolerance) -> Tuple[Atoms, AtomicData]:
-    atoms = molecule("CH3CHO")
-    data = AtomicData.from_ase(atoms, r_max=2.0)
-    return atoms, data
-
-
-@pytest.fixture(scope="session")
-def molecules() -> List[Atoms]:
-    atoms_list = []
-    for i in range(8):
-        atoms = molecule("CH3CHO" if i % 2 == 0 else "H2")
-        atoms.rattle()
-        atoms.calc = SinglePointCalculator(
-            energy=np.random.random(),
-            forces=np.random.random((len(atoms), 3)),
-            stress=None,
-            magmoms=None,
-            atoms=atoms,
-        )
-        atoms_list.append(atoms)
-    return atoms_list
-
-
-@pytest.fixture(scope="session")
-def nequip_dataset(molecules, temp_data, float_tolerance):
-    with tempfile.NamedTemporaryFile(suffix=".xyz") as fp:
-        for atoms in molecules:
-            write(fp.name, atoms, format="extxyz", append=True)
-        a = ASEDataset(
-            file_name=fp.name,
-            root=temp_data,
-            extra_fixed_fields={"r_max": 3.0},
-            ase_args=dict(format="extxyz"),
-            type_mapper=TypeMapper(chemical_symbol_to_type={"H": 0, "C": 1, "O": 2}),
-        )
-        yield a
-
-
-@pytest.fixture(scope="session")
-def atomic_batch(nequip_dataset):
-    return Batch.from_data_list([nequip_dataset[0], nequip_dataset[1]])
-
-
-# Use debug mode
-set_irreps_debug(True)
+# like `source` in bash
+with open(CONFTEST_PATH) as f:
+    exec(f.read())
diff --git a/tests/unit/model/test_nequip_model.py b/tests/unit/model/test_nequip_model.py
new file mode 100644
index 00000000..20cf7064
--- /dev/null
+++ b/tests/unit/model/test_nequip_model.py
@@ -0,0 +1,118 @@
+import pytest
+
+from e3nn import o3
+
+from nequip.data import AtomicDataDict
+from nequip.model import model_from_config
+from nequip.nn import AtomwiseLinear
+from nequip.utils.unittests.model_tests import BaseEnergyModelTests
+
+COMMON_CONFIG = {
+    "avg_num_neighbors": None,
+    "num_types": 3,
+    "types_names": ["H", "C", "O"],
+}
+r_max = 3
+minimal_config1 = dict(
+    irreps_edge_sh="0e + 1o",
+    r_max=4,
+    feature_irreps_hidden="4x0e + 4x1o",
+    num_layers=2,
+    num_basis=8,
+    PolynomialCutoff_p=6,
+    nonlinearity_type="norm",
+    **COMMON_CONFIG
+)
+minimal_config2 = dict(
+    irreps_edge_sh="0e + 1o",
+    r_max=4,
+    chemical_embedding_irreps_out="8x0e + 8x0o + 8x1e + 8x1o",
+    irreps_mid_output_block="2x0e",
+    feature_irreps_hidden="4x0e + 4x1o",
+    **COMMON_CONFIG
+)
+minimal_config3 = dict(
+    irreps_edge_sh="0e + 1o",
+    r_max=4,
+    feature_irreps_hidden="4x0e + 4x1o",
+    num_layers=2,
+    num_basis=8,
+    PolynomialCutoff_p=6,
+    nonlinearity_type="gate",
+    **COMMON_CONFIG
+)
+minimal_config4 = dict(
+    irreps_edge_sh="0e + 1o + 2e",
+    r_max=4,
+    feature_irreps_hidden="2x0e + 2x1o + 2x2e",
+    num_layers=2,
+    num_basis=3,
+    PolynomialCutoff_p=6,
+    nonlinearity_type="gate",
+    # test custom nonlinearities
+    nonlinearity_scalars={"e": "silu", "o": "tanh"},
+    nonlinearity_gates={"e": "silu", "o": "abs"},
+    **COMMON_CONFIG
+)
+
+
+class TestNequIPModel(BaseEnergyModelTests):
+    @pytest.fixture(
+        params=[minimal_config1, minimal_config2, minimal_config3, minimal_config4],
+        scope="class",
+    )
+    def base_config(self, request):
+        return request.param
+
+    @pytest.fixture(
+        params=[
+            (
+                ["EnergyModel", "ForceOutput"],
+                [
+                    AtomicDataDict.TOTAL_ENERGY_KEY,
+                    AtomicDataDict.PER_ATOM_ENERGY_KEY,
+                    AtomicDataDict.FORCE_KEY,
+                ],
+            ),
+            (
+                ["EnergyModel"],
+                [
+                    AtomicDataDict.TOTAL_ENERGY_KEY,
+                    AtomicDataDict.PER_ATOM_ENERGY_KEY,
+                ],
+            ),
+            (
+                ["EnergyModel", "StressForceOutput"],
+                [
+                    AtomicDataDict.TOTAL_ENERGY_KEY,
+                    AtomicDataDict.PER_ATOM_ENERGY_KEY,
+                    AtomicDataDict.FORCE_KEY,
+                    AtomicDataDict.STRESS_KEY,
+                    AtomicDataDict.VIRIAL_KEY,
+                ],
+            ),
+        ],
+        scope="class",
+    )
+    def config(self, request, base_config):
+        config = base_config.copy()
+        builder, out_fields = request.param
+        config = config.copy()
+        config["model_builders"] = builder
+        return config, out_fields
+
+    def test_submods(self):
+        config = minimal_config2.copy()
+        config["model_builders"] = ["EnergyModel"]
+        model = model_from_config(config=config, initialize=True)
+        assert isinstance(model.chemical_embedding, AtomwiseLinear)
+        true_irreps = o3.Irreps(minimal_config2["chemical_embedding_irreps_out"])
+        assert (
+            model.chemical_embedding.irreps_out[model.chemical_embedding.out_field]
+            == true_irreps
+        )
+        # Make sure it propagates
+        assert (
+            model.layer0_convnet.irreps_in[model.chemical_embedding.out_field]
+            == true_irreps
+        )

From ad58349db4d6787a9802f2c52c9f2a34bd0135c6 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 15 Nov 2022 00:20:11 -0500
Subject: [PATCH 17/50] narrower test collection

---
 .github/workflows/tests.yml         | 2 +-
 .github/workflows/tests_develop.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index b93d1ccd..a6e9a8f0 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -44,4 +44,4 @@ jobs:
     - name: Test with pytest
       run: |
         # See https://github.com/pytest-dev/pytest/issues/1075
-        PYTHONHASHSEED=0 pytest -n auto --ignore=docs/ .
+        PYTHONHASHSEED=0 pytest -n auto tests/
diff --git a/.github/workflows/tests_develop.yml b/.github/workflows/tests_develop.yml
index bae5795e..e301f5e3 100644
--- a/.github/workflows/tests_develop.yml
+++ b/.github/workflows/tests_develop.yml
@@ -44,4 +44,4 @@ jobs:
     - name: Test with pytest
       run: |
         # See https://github.com/pytest-dev/pytest/issues/1075
-        PYTHONHASHSEED=0 pytest -n auto --ignore=docs/ .
+        PYTHONHASHSEED=0 pytest -n auto tests/

From e8868eb378945b6ea8d0dba9ec3df3de28306571 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 15 Nov 2022 00:42:17 -0500
Subject: [PATCH 18/50] test edge fields also for batching

---
 nequip/utils/unittests/model_tests.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py
index 372571d4..00f3c5bb 100644
--- a/nequip/utils/unittests/model_tests.py
+++ b/nequip/utils/unittests/model_tests.py
@@ -14,6 +14,7 @@
     Collater,
     _GRAPH_FIELDS,
     _NODE_FIELDS,
+    _EDGE_FIELDS,
 )
 from nequip.data.transforms import TypeMapper
 from nequip.model import model_from_config
@@ -139,6 +140,25 @@ def test_batch(self, model, atomic_batch, device, float_tolerance):
                     output2[out_field],
                     output[out_field][output[AtomicDataDict.BATCH_KEY] == 1],
                 )
+            elif out_field in _EDGE_FIELDS:
+                assert allclose(
+                    output1[out_field],
+                    output[out_field][
+                        output[AtomicDataDict.BATCH_KEY][
+                            output[AtomicDataDict.EDGE_INDEX_KEY][0]
+                        ]
+                        == 0
+                    ],
+                )
+                assert allclose(
+                    output2[out_field],
+                    output[out_field][
+                        output[AtomicDataDict.BATCH_KEY][
+                            output[AtomicDataDict.EDGE_INDEX_KEY][0]
+                        ]
+                        == 1
+                    ],
+                )
             else:
                 raise NotImplementedError
 

From 0a92126ef8bb5cfac5ec1f3b3dcf7b8b5e477f06 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 15 Nov 2022 00:55:21 -0500
Subject: [PATCH 19/50] fix tests for edge features

---
 nequip/utils/unittests/model_tests.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py
index 00f3c5bb..2fe951d9 100644
--- a/nequip/utils/unittests/model_tests.py
+++ b/nequip/utils/unittests/model_tests.py
@@ -189,7 +189,10 @@ def test_embedding_cutoff(self, model, config, device):
         data.pos[2, 1] = r_max  # put it past the cutoff
         edge_embed2 = instance(AtomicData.to_AtomicDataDict(data))[key]
 
-        assert torch.allclose(edge_embed[:2], edge_embed2[:2])
+        if key == AtomicDataDict.EDGE_EMBEDDING_KEY:
+            # we can only check that other edges are unaffected if we know it's an embedding
+            # For example, an Allegro edge feature is many body so will be affected
+            assert torch.allclose(edge_embed[:2], edge_embed2[:2])
         assert edge_embed[2:].abs().sum() > 1e-6  # some nonzero terms
         assert torch.allclose(edge_embed2[2:], torch.zeros(1, device=device))
 

From d26f0139caf52bb9ab5637acfbdf0a4cf2dc30c2 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 15 Nov 2022 01:04:46 -0500
Subject: [PATCH 20/50] fix partial forces test

---
 nequip/utils/unittests/model_tests.py | 12 ++++++------
 tests/unit/model/test_nequip_model.py |  4 ++++
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py
index 2fe951d9..e0d807bd 100644
--- a/nequip/utils/unittests/model_tests.py
+++ b/nequip/utils/unittests/model_tests.py
@@ -327,7 +327,7 @@ def test_numeric_gradient(self, model, atomic_batch, device):
                 numeric, analytical, rtol=5e-2
             )
 
-    def test_partial_forces(self, config, atomic_batch, device):
+    def test_partial_forces(self, config, atomic_batch, device, strict_locality):
         config, out_fields = config
         if "ForceOutput" not in config["model_builders"]:
             pytest.skip()
@@ -363,14 +363,14 @@ def test_partial_forces(self, config, atomic_batch, device):
         assert partial_forces.shape == (n_at, n_at, 3)
         # confirm that sparsity matches graph topology:
         edge_index = data[AtomicDataDict.EDGE_INDEX_KEY]
-        adjacency = torch.zeros(n_at, n_at, dtype=torch.bool)
-        strict_locality = False
+        adjacency = torch.zeros(
+            n_at, n_at, dtype=torch.bool, device=partial_forces.device
+        )
         if strict_locality:
             # only adjacent for nonzero deriv to neighbors
             adjacency[edge_index[0], edge_index[1]] = True
-            adjacency[
-                torch.arange(n_at), torch.arange(n_at)
-            ] = True  # diagonal is ofc True
+            arange = torch.arange(n_at, device=partial_forces.device)
+            adjacency[arange, arange] = True  # diagonal is ofc True
         else:
             # technically only adjacent to n-th degree neighbor, but in this tiny test system that is same as all-to-all and easier to program
             adjacency = data[AtomicDataDict.BATCH_KEY].view(-1, 1) == data[
diff --git a/tests/unit/model/test_nequip_model.py b/tests/unit/model/test_nequip_model.py
index 20cf7064..2aa82e15 100644
--- a/tests/unit/model/test_nequip_model.py
+++ b/tests/unit/model/test_nequip_model.py
@@ -57,6 +57,10 @@
 
 
 class TestNequIPModel(BaseEnergyModelTests):
+    @pytest.fixture
+    def strict_locality(self):
+        return False
+
     @pytest.fixture(
         params=[minimal_config1, minimal_config2, minimal_config3, minimal_config4],
         scope="class",

From 14c129e947bd677240f223cc3f1a366a8810d3b5 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 15 Nov 2022 17:29:58 -0500
Subject: [PATCH 21/50] add --no-compile

---
 CHANGELOG.md                |  1 +
 nequip/scripts/benchmark.py | 43 ++++++++++++++++++++++---------------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 85e07593..4e79bf8c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ Most recent change on the bottom.
 ## [Unreleased] - 0.5.6
 ### Added
 - `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
+- `nequip-benchmark --no-compile`
 
 ### Fixed
 - Git utilities when installed as ZIPed `.egg` (#264)
diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index e1ac63a9..43480bac 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -55,6 +55,11 @@ def main(args=None):
         type=float,
         default=1,
     )
+    parser.add_argument(
+        "--no-compile",
+        help="Don't compile the model to TorchScript",
+        action="store_true",
+    )
 
     # TODO: option to show memory use
 
@@ -130,24 +135,28 @@ def main(args=None):
     print(
         f"    model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable weights"
     )
-    print("Compile...")
-    # "Deploy" it
+
     model.eval()
-    compile_time = time.time()
-    model = script(model)
-    model = _compile_for_deploy(model)
-    compile_time = time.time() - compile_time
-    print(f"    compilation took {compile_time:.4f}s")
-
-    # save and reload to avoid bugs
-    with tempfile.NamedTemporaryFile() as f:
-        torch.jit.save(model, f.name)
-        model = torch.jit.load(f.name, map_location=device)
-        # freeze like in the LAMMPS plugin
-        model = torch.jit.freeze(model)
-        # and reload again just to avoid bugs
-        torch.jit.save(model, f.name)
-        model = torch.jit.load(f.name, map_location=device)
+    if args.no_compile:
+        model = model.to(device)
+    else:
+        print("Compile...")
+        # "Deploy" it
+        compile_time = time.time()
+        model = script(model)
+        model = _compile_for_deploy(model)
+        compile_time = time.time() - compile_time
+        print(f"    compilation took {compile_time:.4f}s")
+
+        # save and reload to avoid bugs
+        with tempfile.NamedTemporaryFile() as f:
+            torch.jit.save(model, f.name)
+            model = torch.jit.load(f.name, map_location=device)
+            # freeze like in the LAMMPS plugin
+            model = torch.jit.freeze(model)
+            # and reload again just to avoid bugs
+            torch.jit.save(model, f.name)
+            model = torch.jit.load(f.name, map_location=device)
 
     # Make sure we're warm past compilation
     warmup = config["_jit_bailout_depth"] + 4  # just to be safe...

From 03142e1618ad5e09372df26a01890443039b5266 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 15 Nov 2022 19:02:39 -0500
Subject: [PATCH 22/50] verbose benchmark

---
 CHANGELOG.md                | 2 +-
 nequip/scripts/benchmark.py | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e79bf8c..afe5f2bc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,7 +10,7 @@ Most recent change on the bottom.
 ## [Unreleased] - 0.5.6
 ### Added
 - `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
-- `nequip-benchmark --no-compile`
+- `nequip-benchmark --no-compile` and `--verbose`
 
 ### Fixed
 - Git utilities when installed as ZIPed `.egg` (#264)
diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index 43480bac..d856c4fa 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -3,6 +3,8 @@
 import tempfile
 import itertools
 import time
+import logging
+import sys
 
 import torch
 from torch.utils.benchmark import Timer, Measurement
@@ -60,12 +62,19 @@ def main(args=None):
         help="Don't compile the model to TorchScript",
         action="store_true",
     )
+    parser.add_argument(
+        "--verbose", help="Logging verbosity level", type=str, default="error"
+    )
 
     # TODO: option to show memory use
 
     # Parse the args
     args = parser.parse_args(args=args)
 
+    root_logger = logging.getLogger()
+    root_logger.setLevel(getattr(logging, args.verbose.upper()))
+    root_logger.handlers = [logging.StreamHandler(sys.stderr)]
+
     if args.device is None:
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     else:

From 036b488e978122b1d4ecfd99416810b58ceb8381 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 15 Nov 2022 19:11:02 -0500
Subject: [PATCH 23/50] memory summary

---
 CHANGELOG.md                |  2 +-
 nequip/scripts/benchmark.py | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index afe5f2bc..be6b1fa6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,7 +10,7 @@ Most recent change on the bottom.
 ## [Unreleased] - 0.5.6
 ### Added
 - `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
-- `nequip-benchmark --no-compile` and `--verbose`
+- `nequip-benchmark --no-compile` and `--verbose` and `--memory-summary`
 
 ### Fixed
 - Git utilities when installed as ZIPed `.egg` (#264)
diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index d856c4fa..ac3a5b94 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -62,12 +62,15 @@ def main(args=None):
         help="Don't compile the model to TorchScript",
         action="store_true",
     )
+    parser.add_argument(
+        "--memory-summary",
+        help="Print torch.cuda.memory_summary() after running the model",
+        action="store_true",
+    )
     parser.add_argument(
         "--verbose", help="Logging verbosity level", type=str, default="error"
     )
 
-    # TODO: option to show memory use
-
     # Parse the args
     args = parser.parse_args(args=args)
 
@@ -144,6 +147,9 @@ def main(args=None):
     print(
         f"    model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable weights"
     )
+    print(
+        f"    model weights and buffers take {sum(p.numel() * p.element_size() for p in itertools.chain(model.parameters(), model.buffers())) / (1024 * 1024):.2f} MB"
+    )
 
     model.eval()
     if args.no_compile:
@@ -205,6 +211,10 @@ def trace_handler(p):
         )
         perloop: Measurement = t.timeit(args.n)
 
+        if args.memory_summary and torch.cuda.is_available():
+            print("Memory usage summary:")
+            print(torch.cuda.memory_summary())
+
         print(" -- Results --")
         print(
             f"PLEASE NOTE: these are speeds for the MODEL, evaluated on --n-data={args.n_data} configurations kept in memory."

From 17c62bbc6104e8f17bd345b5001e54110edeebb0 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Wed, 16 Nov 2022 23:24:15 -0500
Subject: [PATCH 24/50] allow `deploy` model builder argument

---
 nequip/model/_build.py      | 11 +++++++++--
 nequip/scripts/benchmark.py |  2 +-
 nequip/scripts/deploy.py    |  2 +-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/nequip/model/_build.py b/nequip/model/_build.py
index 0fe4e21d..7e1a63fd 100644
--- a/nequip/model/_build.py
+++ b/nequip/model/_build.py
@@ -8,7 +8,10 @@
 
 
 def model_from_config(
-    config, initialize: bool = False, dataset: Optional[AtomicDataset] = None
+    config,
+    initialize: bool = False,
+    dataset: Optional[AtomicDataset] = None,
+    deploy: bool = False,
 ) -> GraphModuleMixin:
     """Build a model based on `config`.
 
@@ -17,11 +20,13 @@ def model_from_config(
      - ``model``: the model produced by the previous builder. Cannot be requested by the first builder, must be requested by subsequent ones.
      - ``initialize``: whether to initialize the model
      - ``dataset``: if ``initialize`` is True, the dataset
+     - ``deploy``: whether the model object is for deployment / inference
 
     Args:
         config
-        initialize (bool): if True (default False), ``model_initializers`` will also be run.
+        initialize (bool): whether ``model_builders`` should be instructed to initialize the model
         dataset: dataset for initializers if ``initialize`` is True.
+        deploy (bool): whether ``model_builders`` should be told the model is for deployment / inference
 
     Returns:
         The build model.
@@ -61,6 +66,8 @@ def model_from_config(
         params = {}
         if "initialize" in pnames:
             params["initialize"] = initialize
+        if "deploy" in pnames:
+            params["deploy"] = deploy
         if "config" in pnames:
             params["config"] = config
         if "dataset" in pnames:
diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index ac3a5b94..82c61d37 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -140,7 +140,7 @@ def main(args=None):
     # Load model:
     print("Building model... ")
     model_time = time.time()
-    model = model_from_config(config, initialize=True, dataset=dataset)
+    model = model_from_config(config, initialize=True, dataset=dataset, deploy=True)
     model_time = time.time() - model_time
     print(f"    building model took {model_time:.4f}s")
     print(f"    model has {sum(p.numel() for p in model.parameters())} weights")
diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py
index 8185ab75..7aafd5fc 100644
--- a/nequip/scripts/deploy.py
+++ b/nequip/scripts/deploy.py
@@ -198,7 +198,7 @@ def main(args=None):
                 args.train_dir, model_name="best_model.pth", device="cpu"
             )
         elif args.model is not None:
-            model = model_from_config(config)
+            model = model_from_config(config, deploy=True)
         else:
             raise AssertionError
 

From b2aa837a7ba53d32d64a6ff2ff5d69014881b423 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Sun, 27 Nov 2022 23:08:55 -0500
Subject: [PATCH 25/50] fix model deploy status in test

---
 nequip/utils/unittests/model_tests.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py
index e0d807bd..2b6a8b63 100644
--- a/nequip/utils/unittests/model_tests.py
+++ b/nequip/utils/unittests/model_tests.py
@@ -44,11 +44,10 @@ def config(self):
     def device(self, request):
         return request.param
 
-    @pytest.fixture(scope="class")
-    def model(self, config, device):
-        config, out_fields = config
-        torch.manual_seed(0)
-        np.random.seed(0)
+    @staticmethod
+    def make_model(config, device, initialize: bool = True, deploy: bool = False):
+        torch.manual_seed(127)
+        np.random.seed(193)
         config = config.copy()
         config.update(
             {
@@ -56,8 +55,14 @@ def model(self, config, device):
                 "types_names": ["H", "C", "O"],
             }
         )
-        model = model_from_config(config)
+        model = model_from_config(config, initialize=initialize, deploy=deploy)
         model = model.to(device)
+        return model
+
+    @pytest.fixture(scope="class")
+    def model(self, config, device):
+        config, out_fields = config
+        model = self.make_model(config, device=device)
         return model, out_fields
 
     # == common tests for all models ==

From 225473933536522fff25f4a5e3b961b6e24df645 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 29 Nov 2022 17:32:11 -0500
Subject: [PATCH 26/50] nequip-benchmark --pdb

---
 CHANGELOG.md                |  1 +
 nequip/scripts/benchmark.py | 24 +++++++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index be6b1fa6..50338467 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ Most recent change on the bottom.
 ### Added
 - `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
 - `nequip-benchmark --no-compile` and `--verbose` and `--memory-summary`
+- `nequip-benchmark --pdb` for debugging model (builder) errors
 
 ### Fixed
 - Git utilities when installed as ZIPed `.egg` (#264)
diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index 82c61d37..5814a838 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -5,6 +5,7 @@
 import time
 import logging
 import sys
+import pdb
 
 import torch
 from torch.utils.benchmark import Timer, Measurement
@@ -70,9 +71,16 @@ def main(args=None):
     parser.add_argument(
         "--verbose", help="Logging verbosity level", type=str, default="error"
     )
+    parser.add_argument(
+        "--pdb",
+        help="Run model builders and model under debugger to easily drop to debugger to investigate errors.",
+        action="store_true",
+    )
 
     # Parse the args
     args = parser.parse_args(args=args)
+    if args.pdb:
+        assert args.profile is None
 
     root_logger = logging.getLogger()
     root_logger.setLevel(getattr(logging, args.verbose.upper()))
@@ -140,7 +148,13 @@ def main(args=None):
     # Load model:
     print("Building model... ")
     model_time = time.time()
-    model = model_from_config(config, initialize=True, dataset=dataset, deploy=True)
+    try:
+        model = model_from_config(config, initialize=True, dataset=dataset, deploy=True)
+    except:  # noqa: E722
+        if args.pdb:
+            pdb.post_mortem()
+        else:
+            raise
     model_time = time.time() - model_time
     print(f"    building model took {model_time:.4f}s")
     print(f"    model has {sum(p.numel() for p in model.parameters())} weights")
@@ -196,6 +210,14 @@ def trace_handler(p):
             for _ in range(1 + warmup + args.n):
                 model(next(datas).copy())
                 p.step()
+    elif args.pdb:
+        print("Running model under debugger...")
+        try:
+            for _ in range(args.n):
+                model(next(datas).copy())
+        except:  # noqa: E722)
+            pdb.post_mortem()
+        print("Done.")
     else:
         print("Warmup...")
         warmup_time = time.time()

From b6331c013c11b0e3fa697808e2cdd8e457f5b1b4 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Fri, 2 Dec 2022 14:49:38 -0500
Subject: [PATCH 27/50] fix device bug

---
 nequip/data/transforms.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nequip/data/transforms.py b/nequip/data/transforms.py
index 5a37e2be..4f6331b7 100644
--- a/nequip/data/transforms.py
+++ b/nequip/data/transforms.py
@@ -121,9 +121,9 @@ def transform(self, atomic_numbers):
                 f"Data included atomic numbers {bad_set} that are not part of the atomic number -> type mapping!"
             )
 
-        return self._Z_to_index[atomic_numbers - self._min_Z].to(
-            device=atomic_numbers.device
-        )
+        return self._Z_to_index.to(device=atomic_numbers.device)[
+            atomic_numbers - self._min_Z
+        ]
 
     def untransform(self, atom_types):
         """Transform atom types back into atomic numbers"""

From 1fb33a29d4d128a304a365351a762fe92c2dff2a Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Fri, 2 Dec 2022 16:19:06 -0500
Subject: [PATCH 28/50] remove broken test

---
 tests/unit/trainer/test_trainer.py | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/tests/unit/trainer/test_trainer.py b/tests/unit/trainer/test_trainer.py
index c8169fda..860be357 100644
--- a/tests/unit/trainer/test_trainer.py
+++ b/tests/unit/trainer/test_trainer.py
@@ -106,26 +106,6 @@ def test_save(self, trainer, format, suffix):
             assert isfile(file_name), "fail to save to file"
             assert suffix in file_name
 
-    @pytest.mark.parametrize("append", [True])  # , False])
-    def test_from_dict(self, trainer, append):
-
-        # torch.save(trainer.model, trainer.best_model_path)
-
-        dictionary = trainer.as_dict(state_dict=True, training_progress=True)
-        trainer1 = Trainer.from_dict(dictionary, append=append)
-
-        for key in [
-            "best_model_path",
-            "last_model_path",
-            "logfile",
-            "epoch_log",
-            "batch_log",
-            "workdir",
-        ]:
-            v1 = getattr(trainer, key, None)
-            v2 = getattr(trainer1, key, None)
-            assert append == (v1 == v2)
-
     @pytest.mark.parametrize("append", [True])  # , False])
     def test_from_file(self, trainer, append):
 

From 288706abecdf1e860ba391a66e0531b50820ec86 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Tue, 6 Dec 2022 21:50:13 -0500
Subject: [PATCH 29/50]  fix test

---
 nequip/utils/unittests/model_tests.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py
index 2b6a8b63..f5f7700b 100644
--- a/nequip/utils/unittests/model_tests.py
+++ b/nequip/utils/unittests/model_tests.py
@@ -74,7 +74,9 @@ def test_jit(self, model, atomic_batch, device):
         instance, out_fields = model
         data = AtomicData.to_AtomicDataDict(atomic_batch.to(device=device))
         instance = instance.to(device=device)
-        model_script = script(instance)
+        model_script = script(
+            instance, in_place=False
+        )  # don't modify the model used by other tests later
 
         for out_field in out_fields:
             assert torch.allclose(

From 0f3909161d387e4a35b26969694b1aaf3c298e2a Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Wed, 7 Dec 2022 09:52:58 -0500
Subject: [PATCH 30/50] Revert " fix test"

This reverts commit 288706abecdf1e860ba391a66e0531b50820ec86.
---
 nequip/utils/unittests/model_tests.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/nequip/utils/unittests/model_tests.py b/nequip/utils/unittests/model_tests.py
index f5f7700b..2b6a8b63 100644
--- a/nequip/utils/unittests/model_tests.py
+++ b/nequip/utils/unittests/model_tests.py
@@ -74,9 +74,7 @@ def test_jit(self, model, atomic_batch, device):
         instance, out_fields = model
         data = AtomicData.to_AtomicDataDict(atomic_batch.to(device=device))
         instance = instance.to(device=device)
-        model_script = script(
-            instance, in_place=False
-        )  # don't modify the model used by other tests later
+        model_script = script(instance)
 
         for out_field in out_fields:
             assert torch.allclose(

From 64df5e52e269fadc1480ae78c4d1682b59bb8486 Mon Sep 17 00:00:00 2001
From: Lixin Sun <nw13mifaso@gmail.com>
Date: Wed, 7 Dec 2022 16:31:54 +0000
Subject: [PATCH 31/50] remove sklearn dependence (#248)

* change solver

* remove dependency on dataset

* add ridge tests

* swap to ridge

* add down sampling

* change to coef

* change to torch.solve

* black

* fix correlated columns

* fix sqrt error

* black

* black

* black

* add normalization

* black

* flake8

* change explaination

* add debug log

* Update tests/conftest.py

Co-authored-by: Alby M. <1473644+Linux-cpp-lisp@users.noreply.github.com>

* Update tests/conftest.py

Co-authored-by: Alby M. <1473644+Linux-cpp-lisp@users.noreply.github.com>

* Fix typo in README (#270)

* update change log

* Update tests/conftest.py

Co-authored-by: Alby M. <1473644+Linux-cpp-lisp@users.noreply.github.com>

* Update tests/conftest.py

Co-authored-by: Alby M. <1473644+Linux-cpp-lisp@users.noreply.github.com>

Co-authored-by: Lixin Sun <lixinsun@microsoft.com>
Co-authored-by: Alby M. <1473644+Linux-cpp-lisp@users.noreply.github.com>
Co-authored-by: Simon Batzner <simonbatzner@gmail.com>
---
 CHANGELOG.md                       |   1 +
 README.md                          |   2 +-
 configs/full.yaml                  |   4 +-
 nequip/utils/regressor.py          | 235 ++++++++---------------------
 nequip/utils/unittests/conftest.py |  17 +++
 setup.py                           |   1 -
 tests/unit/data/test_dataset.py    |  32 ++--
 tests/unit/utils/test_gp.py        |  37 -----
 tests/unit/utils/test_solver.py    |  38 +++++
 9 files changed, 132 insertions(+), 235 deletions(-)
 delete mode 100644 tests/unit/utils/test_gp.py
 create mode 100644 tests/unit/utils/test_solver.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 50338467..67bd0c81 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ Most recent change on the bottom.
 
 ## [Unreleased] - 0.5.6
 ### Added
+- sklearn dependency removed
 - `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
 - `nequip-benchmark --no-compile` and `--verbose` and `--memory-summary`
 - `nequip-benchmark --pdb` for debugging model (builder) errors
diff --git a/README.md b/README.md
index f70840b8..da741c09 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,6 @@ under the guidance of [Boris Kozinsky at Harvard](https://bkoz.seas.harvard.edu/
 If you have questions, please don't hesitate to reach out at batzner[at]g[dot]harvard[dot]edu. 
 
 If you find a bug or have a proposal for a feature, please post it in the [Issues](https://github.com/mir-group/nequip/issues).
-If you have a question, topic, or issue that isn't obviously one of those, try our [GitHub Disucssions](https://github.com/mir-group/nequip/discussions).
+If you have a question, topic, or issue that isn't obviously one of those, try our [GitHub Discussions](https://github.com/mir-group/nequip/discussions).
 
 If you want to contribute to the code, please read [`CONTRIBUTING.md`](CONTRIBUTING.md).
diff --git a/configs/full.yaml b/configs/full.yaml
index 3c3e8984..d310c70a 100644
--- a/configs/full.yaml
+++ b/configs/full.yaml
@@ -315,10 +315,10 @@ per_species_rescale_scales: dataset_forces_rms
 # If not provided, defaults to dataset_per_species_force_rms or dataset_per_atom_total_energy_std, depending on whether forces are being trained.
 # per_species_rescale_kwargs: 
 #   total_energy: 
-#     alpha: 0.1
+#     alpha: 0.001
 #     max_iteration: 20
 #     stride: 100
-# keywords for GP decomposition of per specie energy. Optional. Defaults to 0.1
+# keywords for ridge regression decomposition of per specie energy. Optional. Defaults to 0.001. The value should be in the range of 1e-3 to 1e-2
 # per_species_rescale_arguments_in_dataset_units: True
 # if explicit numbers are given for the shifts/scales, this parameter must specify whether the given numbers are unitless shifts/scales or are in the units of the dataset. If ``True``, any global rescalings will correctly be applied to the per-species values.
 
diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py
index 3d23cf84..30c8f9ab 100644
--- a/nequip/utils/regressor.py
+++ b/nequip/utils/regressor.py
@@ -1,181 +1,72 @@
 import logging
 import torch
-import numpy as np
-from typing import Optional
-from sklearn.gaussian_process import GaussianProcessRegressor
-from sklearn.gaussian_process.kernels import DotProduct, Kernel, Hyperparameter
 
+from torch import matmul
+from torch.linalg import solve, inv
+from typing import Optional, Sequence
+from opt_einsum import contract
 
-def solver(X, y, regressor: Optional[str] = "NormalizedGaussianProcess", **kwargs):
-    if regressor == "GaussianProcess":
-        return gp(X, y, **kwargs)
-    elif regressor == "NormalizedGaussianProcess":
-        return normalized_gp(X, y, **kwargs)
-    else:
-        raise NotImplementedError(f"{regressor} is not implemented")
 
+def solver(X, y, alpha: Optional[float] = 0.001, stride: Optional[int] = 1, **kwargs):
+
+    dtype = torch.get_default_dtype()
+    X = X[::stride].to(dtype)
+    y = y[::stride].to(dtype)
+
+    X, y = down_sampling_by_composition(X, y)
+
+    X_norm = torch.sum(X)
+
+    X = X / X_norm
+    y = y / X_norm
 
-def normalized_gp(X, y, **kwargs):
-    feature_rms = 1.0 / np.sqrt(np.average(X**2, axis=0))
-    feature_rms = np.nan_to_num(feature_rms, 1)
     y_mean = torch.sum(y) / torch.sum(X)
-    mean, std = base_gp(
-        X,
-        y - (torch.sum(X, axis=1) * y_mean).reshape(y.shape),
-        NormalizedDotProduct,
-        {"diagonal_elements": feature_rms},
-        **kwargs,
-    )
-    return mean + y_mean, std
-
-
-def gp(X, y, **kwargs):
-    return base_gp(
-        X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, **kwargs
-    )
-
-
-def base_gp(
-    X,
-    y,
-    kernel,
-    kernel_kwargs,
-    alpha: Optional[float] = 0.1,
-    max_iteration: int = 20,
-    stride: Optional[int] = None,
+
+    feature_rms = torch.sqrt(torch.mean(X**2, axis=0))
+
+    alpha_mat = torch.diag(feature_rms) * alpha * alpha
+
+    A = matmul(X.T, X) + alpha_mat
+    dy = y - (torch.sum(X, axis=1, keepdim=True) * y_mean).reshape(y.shape)
+    Xy = matmul(X.T, dy)
+
+    mean = solve(A, Xy)
+
+    sigma2 = torch.var(matmul(X, mean) - dy)
+    Ainv = inv(A)
+    cov = torch.sqrt(sigma2 * contract("ij,kj,kl,li->i", Ainv, X, X, Ainv))
+
+    mean = mean + y_mean.reshape([-1])
+
+    logging.debug(f"Ridge Regression, residue {sigma2}")
+
+    return mean, cov
+
+
+def down_sampling_by_composition(
+    X: torch.Tensor, y: torch.Tensor, percentage: Sequence = [0.25, 0.5, 0.75]
 ):
 
-    if len(y.shape) == 1:
-        y = y.reshape([-1, 1])
-
-    if stride is not None:
-        X = X[::stride]
-        y = y[::stride]
-
-    not_fit = True
-    iteration = 0
-    mean = None
-    std = None
-    while not_fit:
-        logging.debug(f"GP fitting iteration {iteration} {alpha}")
-        try:
-            _kernel = kernel(**kernel_kwargs)
-            gpr = GaussianProcessRegressor(kernel=_kernel, random_state=0, alpha=alpha)
-            gpr = gpr.fit(X, y)
-
-            vec = torch.diag(torch.ones(X.shape[1]))
-            mean, std = gpr.predict(vec, return_std=True)
-
-            mean = torch.as_tensor(mean, dtype=torch.get_default_dtype()).reshape([-1])
-            # ignore all the off-diagonal terms
-            std = torch.as_tensor(std, dtype=torch.get_default_dtype()).reshape([-1])
-            likelihood = gpr.log_marginal_likelihood()
-
-            res = torch.sqrt(
-                torch.square(torch.matmul(X, mean.reshape([-1, 1])) - y).mean()
-            )
-
-            logging.debug(
-                f"GP fitting: alpha {alpha}:\n"
-                f"            residue {res}\n"
-                f"            mean {mean} std {std}\n"
-                f"            log marginal likelihood {likelihood}"
-            )
-            not_fit = False
-
-        except Exception as e:
-            logging.info(f"GP fitting failed for alpha={alpha} and {e.args}")
-            if alpha == 0 or alpha is None:
-                logging.info("try a non-zero alpha")
-                not_fit = False
-                raise ValueError(
-                    f"Please set the {alpha} to non-zero value. \n"
-                    "The dataset energy is rank deficient to be solved with GP"
-                )
-            else:
-                alpha = alpha * 2
-                iteration += 1
-                logging.debug(f"           increase alpha to {alpha}")
-
-            if iteration >= max_iteration or not_fit is False:
-                raise ValueError(
-                    "Please set the per species shift and scale to zeros and ones. \n"
-                    "The dataset energy is to diverge to be solved with GP"
-                )
-
-    return mean, std
-
-
-class NormalizedDotProduct(Kernel):
-    r"""Dot-Product kernel.
-    .. math::
-        k(x_i, x_j) = x_i \cdot A \cdot x_j
-    """
-
-    def __init__(self, diagonal_elements):
-        # TO DO: check shape
-        self.diagonal_elements = diagonal_elements
-        self.A = np.diag(diagonal_elements)
-
-    def __call__(self, X, Y=None, eval_gradient=False):
-        """Return the kernel k(X, Y) and optionally its gradient.
-        Parameters
-        ----------
-        X : ndarray of shape (n_samples_X, n_features)
-            Left argument of the returned kernel k(X, Y)
-        Y : ndarray of shape (n_samples_Y, n_features), default=None
-            Right argument of the returned kernel k(X, Y). If None, k(X, X)
-            if evaluated instead.
-        eval_gradient : bool, default=False
-            Determines whether the gradient with respect to the log of
-            the kernel hyperparameter is computed.
-            Only supported when Y is None.
-        Returns
-        -------
-        K : ndarray of shape (n_samples_X, n_samples_Y)
-            Kernel k(X, Y)
-        K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
-                optional
-            The gradient of the kernel k(X, X) with respect to the log of the
-            hyperparameter of the kernel. Only returned when `eval_gradient`
-            is True.
-        """
-        X = np.atleast_2d(X)
-        if Y is None:
-            K = (X.dot(self.A)).dot(X.T)
-        else:
-            if eval_gradient:
-                raise ValueError("Gradient can only be evaluated when Y is None.")
-            K = (X.dot(self.A)).dot(Y.T)
-
-        if eval_gradient:
-            return K, np.empty((X.shape[0], X.shape[0], 0))
-        else:
-            return K
-
-    def diag(self, X):
-        """Returns the diagonal of the kernel k(X, X).
-        The result of this method is identical to np.diag(self(X)); however,
-        it can be evaluated more efficiently since only the diagonal is
-        evaluated.
-        Parameters
-        ----------
-        X : ndarray of shape (n_samples_X, n_features)
-            Left argument of the returned kernel k(X, Y).
-        Returns
-        -------
-        K_diag : ndarray of shape (n_samples_X,)
-            Diagonal of kernel k(X, X).
-        """
-        return np.einsum("ij,ij,jj->i", X, X, self.A)
-
-    def __repr__(self):
-        return ""
-
-    def is_stationary(self):
-        """Returns whether the kernel is stationary."""
-        return False
-
-    @property
-    def hyperparameter_diagonal_elements(self):
-        return Hyperparameter("diagonal_elements", "numeric", "fixed")
+    unique_comps, comp_ids = torch.unique(X, dim=0, return_inverse=True)
+
+    n_types = torch.max(comp_ids) + 1
+
+    sort_by = torch.argsort(comp_ids)
+
+    # find out the block for each composition
+    d_icomp = comp_ids[sort_by]
+    d_icomp = d_icomp[:-1] - d_icomp[1:]
+    node_icomp = torch.where(d_icomp != 0)[0]
+    id_start = torch.cat((torch.as_tensor([0]), node_icomp + 1))
+    id_end = torch.cat((node_icomp + 1, torch.as_tensor([len(sort_by)])))
+
+    n_points = len(percentage)
+    new_X = torch.zeros((n_types * n_points, X.shape[1]))
+    new_y = torch.zeros((n_types * n_points))
+    for i in range(n_types):
+        ids = sort_by[id_start[i] : id_end[i]]
+        for j, p in enumerate(percentage):
+            new_y[i * n_points + j] = torch.quantile(y[ids], p, interpolation="linear")
+            new_X[i * n_points + j] = unique_comps[i]
+
+    return new_X, new_y
diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py
index 060e5e7b..77a91930 100644
--- a/nequip/utils/unittests/conftest.py
+++ b/nequip/utils/unittests/conftest.py
@@ -133,5 +133,22 @@ def atomic_batch(nequip_dataset):
     return Batch.from_data_list([nequip_dataset[0], nequip_dataset[1]])
 
 
+@pytest.fixture(scope="function")
+def per_species_set():
+    dtype = torch.get_default_dtype()
+    torch.manual_seed(0)
+    mean_min = 1
+    mean_max = 100
+    std = 20
+    n_sample = 1000
+    n_species = 9
+    ref_mean = torch.rand((n_species)) * (mean_max - mean_min) + mean_min
+    t_mean = torch.ones((n_sample, 1)) * ref_mean.reshape([1, -1])
+    ref_std = torch.rand((n_species)) * std
+    t_std = torch.ones((n_sample, 1)) * ref_std.reshape([1, -1])
+    E = torch.normal(t_mean, t_std)
+    return ref_mean.to(dtype), ref_std.to(dtype), E.to(dtype), n_sample, n_species
+
+
 # Use debug mode
 set_irreps_debug(True)
diff --git a/setup.py b/setup.py
index 8c977e0a..cba6b51f 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,6 @@
         "typing_extensions;python_version<'3.8'",  # backport of Final
         "torch-runstats>=0.2.0",
         "torch-ema>=0.3.0",
-        "scikit_learn<=1.0.1",  # for GaussianProcess for per-species statistics; 1.0.2 has a bug!
     ],
     zip_safe=True,
 )
diff --git a/tests/unit/data/test_dataset.py b/tests/unit/data/test_dataset.py
index f45e0ca8..bad796c3 100644
--- a/tests/unit/data/test_dataset.py
+++ b/tests/unit/data/test_dataset.py
@@ -31,7 +31,7 @@ def ase_file(molecules):
 
 
 MAX_ATOMIC_NUMBER: int = 5
-NATOMS = 3
+NATOMS = 10
 
 
 @pytest.fixture(scope="function")
@@ -277,16 +277,11 @@ def test_per_node_field(self, npz_dataset, fixed_field, mode, subset):
         )
         print(result)
 
-    @pytest.mark.parametrize("alpha", [1e-5, 1e-3, 0.1, 0.5])
+    @pytest.mark.parametrize("alpha", [0, 1e-3, 0.01])
     @pytest.mark.parametrize("fixed_field", [True, False])
     @pytest.mark.parametrize("full_rank", [True, False])
     @pytest.mark.parametrize("subset", [True, False])
-    @pytest.mark.parametrize(
-        "regressor", ["NormalizedGaussianProcess", "GaussianProcess"]
-    )
-    def test_per_graph_field(
-        self, npz_dataset, alpha, fixed_field, full_rank, regressor, subset
-    ):
+    def test_per_graph_field(self, npz_dataset, alpha, fixed_field, full_rank, subset):
 
         if alpha <= 1e-4 and not full_rank:
             return
@@ -308,10 +303,7 @@ def test_per_graph_field(
         del n_spec
         del Ns
 
-        if alpha == 1e-5:
-            ref_mean, ref_std, E = generate_E(N, 100, 1000, 0.0)
-        else:
-            ref_mean, ref_std, E = generate_E(N, 100, 1000, 0.5)
+        ref_mean, ref_std, E = generate_E(N, 100, 1000, 10)
 
         if subset:
             E_orig_order = torch.zeros_like(
@@ -333,7 +325,6 @@ def test_per_graph_field(
                 AtomicDataDict.TOTAL_ENERGY_KEY
                 + "per_species_mean_std": {
                     "alpha": alpha,
-                    "regressor": regressor,
                     "stride": 1,
                 }
             },
@@ -341,21 +332,18 @@ def test_per_graph_field(
 
         res = torch.matmul(N, mean.reshape([-1, 1])) - E.reshape([-1, 1])
         res2 = torch.sum(torch.square(res))
-        print("residue", alpha, res2 - ref_res2)
+        print("alpha, residue, actual residue", alpha, res2, ref_res2)
         print("mean", mean, ref_mean)
         print("diff in mean", mean - ref_mean)
         print("std", std, ref_std)
 
+        tolerance = torch.max(ref_std) * 4
         if full_rank:
-            if alpha == 1e-5:
-                assert torch.allclose(mean, ref_mean, rtol=1e-1)
-            else:
-                assert torch.allclose(mean, ref_mean, rtol=1)
-                assert torch.allclose(std, torch.zeros_like(ref_mean), atol=alpha * 100)
-        elif regressor == "NormalizedGaussianProcess":
-            assert torch.std(mean).numpy() == 0
+            assert torch.allclose(mean, ref_mean, atol=tolerance)
+            # assert torch.allclose(std, torch.zeros_like(ref_mean), atol=alpha * 100)
         else:
-            assert mean[0] == mean[1] * 2
+            assert torch.allclose(mean, mean[0], atol=tolerance)
+            # assert torch.std(mean).numpy() == 0
 
 
 class TestReload:
diff --git a/tests/unit/utils/test_gp.py b/tests/unit/utils/test_gp.py
deleted file mode 100644
index 4792b9d2..00000000
--- a/tests/unit/utils/test_gp.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import torch
-import pytest
-
-from nequip.utils.regressor import base_gp
-from sklearn.gaussian_process.kernels import DotProduct
-
-
-# @pytest.mark.parametrize("full_rank", [True, False])
-@pytest.mark.parametrize("full_rank", [False])
-@pytest.mark.parametrize("alpha", [0, 1e-3, 0.1, 1])
-def test_random(full_rank, alpha):
-
-    if alpha == 0 and not full_rank:
-        return
-
-    torch.manual_seed(0)
-    n_samples = 10
-    n_dim = 3
-
-    if full_rank:
-        X = torch.randint(low=1, high=10, size=(n_samples, n_dim))
-    else:
-        X = torch.randint(low=1, high=10, size=(n_samples, 1)) * torch.ones(
-            (n_samples, n_dim)
-        )
-
-    ref_mean = torch.rand((n_dim, 1))
-    y = torch.matmul(X, ref_mean)
-
-    mean, std = base_gp(
-        X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, alpha=0.1
-    )
-
-    if full_rank:
-        assert torch.allclose(ref_mean, mean, rtol=0.5)
-    else:
-        assert torch.allclose(mean, mean[0], rtol=1e-3)
diff --git a/tests/unit/utils/test_solver.py b/tests/unit/utils/test_solver.py
new file mode 100644
index 00000000..049c897d
--- /dev/null
+++ b/tests/unit/utils/test_solver.py
@@ -0,0 +1,38 @@
+import torch
+import pytest
+
+from nequip.utils.regressor import solver
+
+
+@pytest.mark.parametrize("full_rank", [True, False])
+@pytest.mark.parametrize("alpha", [0, 1e-3, 1e-2])
+def test_random(full_rank, alpha, per_species_set):
+
+    if alpha == 0 and not full_rank:
+        return
+
+    torch.manual_seed(0)
+
+    ref_mean, ref_std, E, n_samples, n_dim = per_species_set
+
+    dtype = torch.get_default_dtype()
+
+    X = torch.randint(low=1, high=10, size=(n_samples, n_dim)).to(dtype)
+    if not full_rank:
+        X[:, n_dim - 2] = X[:, n_dim - 1] * 2
+    y = (X * E).sum(axis=-1)
+
+    mean, std = solver(X, y, alpha=alpha)
+
+    tolerance = torch.max(ref_std)
+
+    print("tolerance", tolerance)
+    print("solution", mean, std)
+    print("diff", mean - ref_mean)
+
+    if full_rank:
+        assert torch.allclose(ref_mean, mean, atol=tolerance)
+    else:
+        assert torch.allclose(mean[n_dim - 1], mean[n_dim - 2], atol=tolerance)
+
+    assert torch.max(std) < tolerance

From 6ae6710aa7ae145f2d0b6917671f7a53c23975ec Mon Sep 17 00:00:00 2001
From: Lixin Sun <nw13mifaso@gmail.com>
Date: Wed, 7 Dec 2022 16:32:15 +0000
Subject: [PATCH 32/50] Better documentation and guidance (#102)

* add rst for how to setup a dataset

* Update dataset.rst

* upadte command line instruction

* update link

* update dataset keys in all_options

* RST cleanup

* command line reference

* update the per_species doc

* lint

* test

* another test

* update all colders

* fix misspell

* add trainer to api

* add install instructions to docs

* simpler install

Co-authored-by: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Co-authored-by: Lixin Sun <lixinsun@microsoft.com>
---
 configs/full.yaml                         |   9 +-
 docs/api/nequip.rst                       |   3 +-
 docs/api/trainer.rst                      |  10 ++
 docs/cite.rst                             |   3 +
 docs/commandline/commands.rst             | 132 ++++++++++++++++++
 docs/{guide/FAQ.rst => errors/errors.rst} |  13 +-
 docs/faq/FAQ.rst                          |  14 ++
 docs/guide/guide.rst                      |   9 --
 docs/guide/intro.rst                      |   4 -
 docs/guide/irreps.rst                     |   9 --
 docs/{guide => howto}/conventions.rst     |   0
 docs/howto/dataset.rst                    | 156 ++++++++++++++++++++++
 docs/howto/howto.rst                      |   7 +
 docs/{guide => howto}/migrate.rst         |   0
 docs/index.rst                            |  12 +-
 docs/installation/install.rst             |  39 ++++++
 docs/introduction/intro.rst               |   4 +
 docs/lammps/all.rst                       |   7 +
 docs/lammps/ase.rst                       |   2 +
 docs/lammps/lammps.rst                    |   2 +
 docs/options/dataset.rst                  |  10 +-
 docs/yaml/yaml.rst                        |   4 +
 nequip/data/dataset.py                    |   2 +-
 nequip/scripts/deploy.py                  |   2 +-
 nequip/scripts/evaluate.py                |  10 +-
 nequip/scripts/train.py                   |   8 +-
 26 files changed, 421 insertions(+), 50 deletions(-)
 create mode 100644 docs/api/trainer.rst
 create mode 100644 docs/cite.rst
 create mode 100644 docs/commandline/commands.rst
 rename docs/{guide/FAQ.rst => errors/errors.rst} (56%)
 create mode 100644 docs/faq/FAQ.rst
 delete mode 100644 docs/guide/guide.rst
 delete mode 100644 docs/guide/intro.rst
 delete mode 100644 docs/guide/irreps.rst
 rename docs/{guide => howto}/conventions.rst (100%)
 create mode 100644 docs/howto/dataset.rst
 create mode 100644 docs/howto/howto.rst
 rename docs/{guide => howto}/migrate.rst (100%)
 create mode 100644 docs/installation/install.rst
 create mode 100644 docs/introduction/intro.rst
 create mode 100644 docs/lammps/all.rst
 create mode 100644 docs/lammps/ase.rst
 create mode 100644 docs/lammps/lammps.rst
 create mode 100644 docs/yaml/yaml.rst

diff --git a/configs/full.yaml b/configs/full.yaml
index d310c70a..2f98164e 100644
--- a/configs/full.yaml
+++ b/configs/full.yaml
@@ -211,6 +211,8 @@ loss_coeffs:
   total_energy:                                                                    
     - 1
     - PerAtomMSELoss
+# note that the ratio between force and energy loss matters for the training process. One may consider using 1:1 with the PerAtomMSELoss. If the energy loss still significantly dominate the loss function at the initial epochs, tune the energy loss weight lower helps the training a lot.
+
 
 # # default loss function is MSELoss, the name has to be exactly the same as those in torch.nn.
 # the only supprted targets are forces and total_energy
@@ -342,9 +344,10 @@ global_rescale_scale_trainable: false
 # global_rescale_shift_trainable: false
 # global_rescale_scale: dataset_forces_rms
 # global_rescale_scale_trainable: false
-# per_species_rescale_trainable: true
-# per_species_rescale_shifts: dataset_per_atom_total_energy_mean
-# per_species_rescale_scales: dataset_per_atom_total_energy_std
+# per_species_rescale_shifts_trainable: false
+# per_species_rescale_scales_trainable: true
+# per_species_rescale_shifts: dataset_per_species_total_energy_mean
+# per_species_rescale_scales: dataset_per_species_forces_rms
 
 # # full block needed for global rescale
 # global_rescale_shift: dataset_total_energy_mean
diff --git a/docs/api/nequip.rst b/docs/api/nequip.rst
index 13bc37ca..6f6250cf 100644
--- a/docs/api/nequip.rst
+++ b/docs/api/nequip.rst
@@ -3,4 +3,5 @@ Python API
 
  .. toctree::
 
-    data
\ No newline at end of file
+    data
+    trainer
diff --git a/docs/api/trainer.rst b/docs/api/trainer.rst
new file mode 100644
index 00000000..983e6f6b
--- /dev/null
+++ b/docs/api/trainer.rst
@@ -0,0 +1,10 @@
+nequip.trainer
+==============
+ 
+ .. automodule:: nequip.train.trainer
+    :members:
+    :imported-members:
+
+ .. automodule:: nequip.train.trainer_wandb
+    :members:
+    :imported-members:
diff --git a/docs/cite.rst b/docs/cite.rst
new file mode 100644
index 00000000..9f8296cc
--- /dev/null
+++ b/docs/cite.rst
@@ -0,0 +1,3 @@
+Citing Nequip
+=============
+
diff --git a/docs/commandline/commands.rst b/docs/commandline/commands.rst
new file mode 100644
index 00000000..b58c87ab
--- /dev/null
+++ b/docs/commandline/commands.rst
@@ -0,0 +1,132 @@
+Command-line Executables
+========================
+
+``nequip-train``
+----------------
+
+ .. code ::
+
+    usage: nequip-train [-h] [--equivariance-test] [--model-debug-mode] [--grad-anomaly-mode] [--log LOG] config
+
+Train (or restart training of) a NequIP model.
+
+positional arguments:
+  config               YAML file configuring the model, dataset, and other options
+
+optional arguments:
+  -h, --help           show this help message and exit
+  --equivariance-test  test the model's equivariance before training
+  --model-debug-mode   enable model debug mode, which can sometimes give much more useful error messages at the
+                       cost of some speed. Do not use for production training!
+  --grad-anomaly-mode  enable PyTorch autograd anomaly mode to debug NaN gradients. Do not use for production
+                       training!
+  --log LOG            log file to store all the screen logging
+
+``nequip-evaluate``
+-------------------
+
+ .. code ::
+
+    usage: nequip-evaluate [-h] [--train-dir TRAIN_DIR] [--model MODEL] [--dataset-config DATASET_CONFIG]
+                        [--metrics-config METRICS_CONFIG] [--test-indexes TEST_INDEXES] [--batch-size BATCH_SIZE]
+                        [--device DEVICE] [--output OUTPUT] [--log LOG]
+
+Compute the error of a model on a test set using various metrics. The model, metrics, dataset, etc. can specified
+in individual YAML config files, or a training session can be indicated with ``--train-dir``. In order of priority,
+the global settings (dtype, TensorFloat32, etc.) are taken from: (1) the model config (for a training session), (2)
+the dataset config (for a deployed model), or (3) the defaults. Prints only the final result in ``name = num`` format
+to stdout; all other information is ``logging.debug``ed to stderr. WARNING: Please note that results of CUDA models
+are rarely exactly reproducible, and that even CPU models can be nondeterministic.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --train-dir TRAIN_DIR
+                        Path to a working directory from a training session.
+  --model MODEL         A deployed or pickled NequIP model to load. If omitted, defaults to `best_model.pth` in
+                        `train_dir`.
+  --dataset-config DATASET_CONFIG
+                        A YAML config file specifying the dataset to load test data from. If omitted, `config.yaml`
+                        in `train_dir` will be used
+  --metrics-config METRICS_CONFIG
+                        A YAML config file specifying the metrics to compute. If omitted, `config.yaml` in
+                        `train_dir` will be used. If the config does not specify `metrics_components`, the default
+                        is to logging.debug MAEs and RMSEs for all fields given in the loss function. If the
+                        literal string `None`, no metrics will be computed.
+  --test-indexes TEST_INDEXES
+                        Path to a file containing the indexes in the dataset that make up the test set. If omitted,
+                        all data frames *not* used as training or validation data in the training session
+                        `train_dir` will be used.
+  --batch-size BATCH_SIZE
+                        Batch size to use. Larger is usually faster on GPU.
+  --device DEVICE       Device to run the model on. If not provided, defaults to CUDA if available and CPU
+                        otherwise.
+  --output OUTPUT       XYZ file to write out the test set and model predicted forces, energies, etc. to.
+  --log LOG             log file to store all the metrics and screen logging.debug
+
+``nequip-deploy``
+-----------------
+
+ .. code ::
+
+    usage: nequip-deploy [-h] {info,build} ...
+
+Deploy and view information about previously deployed NequIP models.
+
+optional arguments:
+  -h, --help    show this help message and exit
+
+commands:
+  {info,build}
+    info        Get information from a deployed model file
+    build       Build a deployment model
+
+``nequip-deploy info``
+~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code ::
+
+    usage: nequip-deploy info [-h] model_path
+
+positional arguments:
+  model_path  Path to a deployed model file.
+
+optional arguments:
+  -h, --help  show this help message and exit
+
+
+``nequip-deploy build``
+~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code ::
+
+    usage: nequip-deploy build [-h] train_dir out_file
+
+positional arguments:
+  train_dir   Path to a working directory from a training session.
+  out_file    Output file for deployed model.
+
+optional arguments:
+  -h, --help  show this help message and exit
+
+
+``nequip-benchmark``
+--------------------
+
+ .. code ::
+
+    usage: nequip-benchmark [-h] [--profile PROFILE] [--device DEVICE] [-n N] [--n-data N_DATA] [--timestep TIMESTEP]
+                            config
+
+Benchmark the approximate MD performance of a given model configuration / dataset pair.
+
+positional arguments:
+  config               configuration file
+
+optional arguments:
+  -h, --help           show this help message and exit
+  --profile PROFILE    Profile instead of timing, creating and outputing a Chrome trace JSON to the given path.
+  --device DEVICE      Device to run the model on. If not provided, defaults to CUDA if available and CPU
+                       otherwise.
+  -n N                 Number of trials.
+  --n-data N_DATA      Number of frames to use.
+  --timestep TIMESTEP  MD timestep for ns/day esimation, in fs. Defauts to 1fs.
diff --git a/docs/guide/FAQ.rst b/docs/errors/errors.rst
similarity index 56%
rename from docs/guide/FAQ.rst
rename to docs/errors/errors.rst
index 92ac758e..576e553d 100644
--- a/docs/guide/FAQ.rst
+++ b/docs/errors/errors.rst
@@ -1,14 +1,5 @@
-FAQ
-===
-
-How do I...
------------
-
-... continue to train a model that reached a stopping condition?
-    There will be an answer here.
-
-1. Reload the model trained with version 0.3.3 to the code in 0.4.
-   check out the migration note at :ref:`migration_note`.
+Errors
+======
 
 Common errors
 -------------
diff --git a/docs/faq/FAQ.rst b/docs/faq/FAQ.rst
new file mode 100644
index 00000000..411e77c1
--- /dev/null
+++ b/docs/faq/FAQ.rst
@@ -0,0 +1,14 @@
+FAQ
+===
+
+How do I...
+-----------
+
+... continue to train a model that reached a stopping condition?
+    There will be an answer here.
+
+1. Reload the model trained with version 0.3.3 to the code in 0.4.
+   check out the migration note at :ref:`migration_note`.
+
+2. Specify my dataset for `nequip-train` and `nequip-eval`, see :ref:`_dataset_note`.
+
diff --git a/docs/guide/guide.rst b/docs/guide/guide.rst
deleted file mode 100644
index 6def3859..00000000
--- a/docs/guide/guide.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-NequIP User Guide
-=================
-
- .. toctree::
-
-    intro
-    irreps
-    conventions
-    FAQ
\ No newline at end of file
diff --git a/docs/guide/intro.rst b/docs/guide/intro.rst
deleted file mode 100644
index 7afa4132..00000000
--- a/docs/guide/intro.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-Tutorial: Introduction to NequIP
-================================
-
-TODO
\ No newline at end of file
diff --git a/docs/guide/irreps.rst b/docs/guide/irreps.rst
deleted file mode 100644
index 5f9b2735..00000000
--- a/docs/guide/irreps.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-Irreps
-======
-
-.. _Irreps:
-
-Syntax to specify irreps
-------------------------
-
-TODO: descripe irreps syntax here
\ No newline at end of file
diff --git a/docs/guide/conventions.rst b/docs/howto/conventions.rst
similarity index 100%
rename from docs/guide/conventions.rst
rename to docs/howto/conventions.rst
diff --git a/docs/howto/dataset.rst b/docs/howto/dataset.rst
new file mode 100644
index 00000000..2b5267e7
--- /dev/null
+++ b/docs/howto/dataset.rst
@@ -0,0 +1,156 @@
+.. _dataset_note:
+   
+How to prepare training dataset
+===============================
+
+What does NequIP behind the scene
+---------------------------------
+
+NequIP uses AtomicDataset class to store the atomic configurations. 
+During the initialization of an AtomicDataset object, 
+NequIP reads the atomic structures from the dataset, 
+computes the neighbor list and other data structures needed for the GNN 
+by converting raw data to a list of ``AtomicData`` objects.
+
+The computed results are then cached on harddisk ``root/processed_hashkey`` folder.
+The hashing is based on all the metadata provided for the dataset, 
+which includes the file name, the cutoff radius, float number precision and etc.
+In the case where multiple training/evaluation runs use the same dataset,
+the neighbor list will only be computed in the first NequIP run.
+The later runs will directly load the AtomicDataset object from the cache file to save computation time.
+
+Note: be careful to the cached file. If you update your raw data file but keep using the same filename,
+NequIP will not automatically update the cached data.
+
+Key concepts
+------------
+
+fixed_fields
+~~~~~~~~~~~~
+Fixed fields are the quantities that are shared among all the configurations in the dataset.
+For example, if the dataset is a trajectory of an NVT MD simulation, the super cell size and the atomic species 
+are indeed a constant matrix/vector through out the whole dataset.
+In this case, in stead of repeating the same values for many times, 
+we specify the cell and species as fixed fields and only provide them once.
+
+yaml interface
+~~~~~~~~~~~~~~
+``nequip-train`` and ``nequip-evaluate`` automatically construct the AtomicDataset based on the yaml arguments.
+Later sections offer a couple different examples.
+
+If the training and validation datasets are from different raw files, the arguments for each set
+can be defined with ``dataset`` prefix and ``validation_dataset`` prefix, respectively.
+
+For example, ``dataset_file_name`` is used for training data and ``validation_dataset_file_name`` is for validation data.
+
+Python interface
+~~~~~~~~~~~~~~~~
+See ``nequip.data.dataset.AtomicInMemoryDataset``.
+
+Prepare dataset and specify in yaml config
+------------------------------------------
+
+ASE format
+~~~~~~~~~~
+
+NequIP accept all format that can be parsed by `ase.io.read` function. 
+We recommend `extxyz`.
+
+Example: Given an atomic data stored in "H2.extxyz" that looks like below:
+
+.. code:: extxyz
+
+   2
+   Properties=species:S:1:pos:R:3 energy=-10 user_label=2.0 pbc="F F F"
+   H       0.00000000       0.00000000       0.00000000
+   H       0.00000000       0.00000000       1.02000000
+
+The yaml input should be
+
+.. code:: yaml
+
+   dataset: ase
+   dataset_file_name: H2.extxyz
+   ase_args:
+   format: extxyz
+   include_keys:
+     - user_label
+   key_mapping:
+     user_label: label0
+   chemical_symbol_to_type:
+     H: 0
+
+For other formats than `extxyz`, be careful to the ase parsers; they may have different behavior from the extxyz parser.
+For example, the ase vasp parser store potential energy to `free_energy` instead of `energy`.
+Because we optimize our code to the `extxyz` parser, NequIP will not be able to load any `total_energy` labels.
+We need some additional keys to help NequIP to understand the situtaion
+Here's an example for vasp outcar. 
+
+.. code:: yaml
+
+   dataset: ase
+   dataset_file_name: OUTCAR
+   ase_args:
+     format: vasp-out
+   key_mapping:
+     free_energy: total_energy
+   chemical_symbol_to_type:
+     H: 0
+
+The way around is to use key mapping, please see more note below.
+
+NPZ format
+~~~~~~~~~~
+
+If your dataset constitute configurations that always have the same number of atoms, npz data format can be an option.
+
+In the npz file, all the values should have the same row as the number of the configurations. 
+For example, the force array of 36 atomic configurations of an N-atom system should have the shape of (36, N, 3);
+their total_energy array should have the shape of (36).
+
+Below is an example of the yaml specification.
+
+.. code:: yaml
+
+   dataset: npz
+   dataset_file_name: example.npz
+   include_keys:
+     - user_label1
+     - user_label2
+   npz_fixed_field_keys:
+     - cell
+     - atomic_numbers
+   key_mapping:
+     position: pos
+     force: forces
+     energy: total_energy
+     Z: atomic_numbers
+
+
+Note on key mapping
+~~~~~~~~~~~~~~~~~~~
+
+NequIP has default key names for energy, force, cell (defined at nequip.data._keys)
+Unlike in the ASE format where these information is automatically parsed,
+in the npz data format, the correct key names have to be provided.
+The common key names are: `total_energy`, `forces`, `atomic_numbers`, `pos`, `cell`, `pbc`.
+the key_mapping can help to convert the user defined name (key) to NequIP default name (value).
+
+
+Advanced options
+----------------
+
+skip frames during data processing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The `include_frame` argument can be specified in yaml to skip certain frames in the raw datafile.
+The item has to be a list or a python iteratable object.
+
+register user-defined graph, node, edge fields
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Graph, node, edge fields are quantities that belong to 
+the whole graph, each atom, each edge, respectively.
+Example graph fields include cell, pbc, and total_energy.
+Example node fields include pos, forces 
+
+To help NequIP to properly assemble the batch data, graph quantity other than 
+cell, pbc, total_energy should be registered.
diff --git a/docs/howto/howto.rst b/docs/howto/howto.rst
new file mode 100644
index 00000000..07e84e84
--- /dev/null
+++ b/docs/howto/howto.rst
@@ -0,0 +1,7 @@
+How-to Tutorials
+================
+
+ .. toctree::
+
+    dataset
+    migrate
diff --git a/docs/guide/migrate.rst b/docs/howto/migrate.rst
similarity index 100%
rename from docs/guide/migrate.rst
rename to docs/howto/migrate.rst
diff --git a/docs/index.rst b/docs/index.rst
index dc6ecd43..d2edd1a6 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,12 +9,20 @@ NequIP
 NequIP is an open-source package for creating, training, and using E(3)-equivariant machine learning interatomic potentials.
 
 .. toctree::
-   :maxdepth: 3
+   :maxdepth: 2
    :caption: Contents:
 
-   guide/guide
+   introduction/intro
+   cite
+   installation/install
+   yaml/yaml
+   howto/howto
+   faq/FAQ
+   commandline/commands
+   lammps/all
    options/options
    api/nequip
+   errors/errors
 
 
 
diff --git a/docs/installation/install.rst b/docs/installation/install.rst
new file mode 100644
index 00000000..3e946815
--- /dev/null
+++ b/docs/installation/install.rst
@@ -0,0 +1,39 @@
+Installation
+============
+
+NequIP requires:
+
+ * Python >= 3.6
+ * PyTorch >= 1.8, <=1.11.*. PyTorch can be installed following the `instructions from their documentation <https://pytorch.org/get-started/locally/>`_. Note that neither ``torchvision`` nor ``torchaudio``, included in the default install command, are needed for NequIP.
+
+To install:
+
+ * We use `Weights&Biases <https://wandb.ai>`_ to keep track of experiments. This is not a strict requirement — you can use our package without it — but it may make your life easier. If you want to use it, create an account `here <https://wandb.ai/login?signup=true>`_ and install the Python package::
+
+    pip install wandb
+
+ * Install the latest stable NequIP::
+
+    pip install https://github.com/mir-group/nequip/archive/main.zip
+
+To install previous versions of NequIP, please clone the repository from GitHub and check out the appropriate tag (for example ``v0.3.3`` for version 0.3.3).
+
+To install the current **unstable** development version of NequIP, please clone our repository and check out the ``develop`` branch.
+
+Installation Issues
+-------------------
+
+The easiest way to check if your installation is working is to train a _toy_ model::
+
+    nequip-train configs/minimal.yaml
+
+If you suspect something is wrong, encounter errors, or just want to confirm that everything is in working order, you can also run the unit tests::
+
+    pip install pytest
+    pytest tests/unit/
+
+To run the full tests, including a set of longer/more intensive integration tests, run::
+
+    pytest tests/
+
+If a GPU is present, the unit tests will use it.
\ No newline at end of file
diff --git a/docs/introduction/intro.rst b/docs/introduction/intro.rst
new file mode 100644
index 00000000..e0dcc32c
--- /dev/null
+++ b/docs/introduction/intro.rst
@@ -0,0 +1,4 @@
+Overview
+========
+
+TODO
diff --git a/docs/lammps/all.rst b/docs/lammps/all.rst
new file mode 100644
index 00000000..9faac07e
--- /dev/null
+++ b/docs/lammps/all.rst
@@ -0,0 +1,7 @@
+Integration to LAMMPS, ASE
+==========================
+
+ .. toctree::
+
+    lammps
+    ase
diff --git a/docs/lammps/ase.rst b/docs/lammps/ase.rst
new file mode 100644
index 00000000..3729cde3
--- /dev/null
+++ b/docs/lammps/ase.rst
@@ -0,0 +1,2 @@
+ASE
+===
diff --git a/docs/lammps/lammps.rst b/docs/lammps/lammps.rst
new file mode 100644
index 00000000..f9d0ba9f
--- /dev/null
+++ b/docs/lammps/lammps.rst
@@ -0,0 +1,2 @@
+LAMMPS
+======
diff --git a/docs/options/dataset.rst b/docs/options/dataset.rst
index 54b39fc9..f3ca194c 100644
--- a/docs/options/dataset.rst
+++ b/docs/options/dataset.rst
@@ -33,7 +33,7 @@ key_mapping
     | Type: dict
     | Default: ``{'positions': 'pos', 'energy': 'total_energy', 'force': 'forces', 'forces': 'forces', 'Z': 'atomic_numbers', 'atomic_number': 'atomic_numbers'}``
 
-npz_keys
+include_keys
 ^^^^^^^^
     | Type: list
     | Default: ``[]``
@@ -68,5 +68,11 @@ include_frames
     | Type: NoneType
     | Default: ``None``
 
+ase_args
+^^^^^^^^
+    | Type: dict
+    | Default: ``{}``
+
 Advanced
---------
\ No newline at end of file
+--------
+See tutorial on :ref:`../guide/_dataset_note`.
diff --git a/docs/yaml/yaml.rst b/docs/yaml/yaml.rst
new file mode 100644
index 00000000..fd804436
--- /dev/null
+++ b/docs/yaml/yaml.rst
@@ -0,0 +1,4 @@
+YAML input
+==========
+
+TODO
diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py
index 2b2279d9..c38b8eae 100644
--- a/nequip/data/dataset.py
+++ b/nequip/data/dataset.py
@@ -641,7 +641,7 @@ class NpzDataset(AtomicInMemoryDataset):
     """Load data from an npz file.
 
     To avoid loading unneeded data, keys are ignored by default unless they are in ``key_mapping``, ``include_keys``,
-    ``npz_fixed_fields`` or ``extra_fixed_fields``.
+    ``npz_fixed_fields_keys`` or ``extra_fixed_fields``.
 
     Args:
         key_mapping (Dict[str, str]): mapping of npz keys to ``AtomicData`` keys. Optional
diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py
index 7aafd5fc..4d7686e1 100644
--- a/nequip/scripts/deploy.py
+++ b/nequip/scripts/deploy.py
@@ -129,7 +129,7 @@ def load_deployed_model(
 
 def main(args=None):
     parser = argparse.ArgumentParser(
-        description="Create and view information about deployed NequIP potentials."
+        description="Deploy and view information about previously deployed NequIP models."
     )
     # backward compat for 3.6
     if sys.version_info[1] > 6:
diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py
index 7c4e2abc..f7dfa12b 100644
--- a/nequip/scripts/evaluate.py
+++ b/nequip/scripts/evaluate.py
@@ -30,13 +30,13 @@ def main(args=None, running_as_script: bool = True):
         description=textwrap.dedent(
             """Compute the error of a model on a test set using various metrics.
 
-            The model, metrics, dataset, etc. can specified individually, or a training session can be indicated with `--train-dir`.
+            The model, metrics, dataset, etc. can specified in individual YAML config files, or a training session can be indicated with `--train-dir`.
             In order of priority, the global settings (dtype, TensorFloat32, etc.) are taken from:
-              1. The model config (for a training session)
-              2. The dataset config (for a deployed model)
-              3. The defaults
+              (1) the model config (for a training session),
+              (2) the dataset config (for a deployed model),
+              or (3) the defaults.
 
-            Prints only the final result in `name = num` format to stdout; all other information is logging.debuged to stderr.
+            Prints only the final result in `name = num` format to stdout; all other information is `logging.debug`ed to stderr.
 
             WARNING: Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic.
             """
diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py
index c6aa7785..88b55f7e 100644
--- a/nequip/scripts/train.py
+++ b/nequip/scripts/train.py
@@ -81,8 +81,12 @@ def main(args=None, running_as_script: bool = True):
 
 
 def parse_command_line(args=None):
-    parser = argparse.ArgumentParser(description="Train a NequIP model.")
-    parser.add_argument("config", help="configuration file")
+    parser = argparse.ArgumentParser(
+        description="Train (or restart training of) a NequIP model."
+    )
+    parser.add_argument(
+        "config", help="YAML file configuring the model, dataset, and other options"
+    )
     parser.add_argument(
         "--equivariance-test",
         help="test the model's equivariance before training on n (default 1) random frames from the dataset",

From 89977eab75dbc594dc3f7751b77f7f833a0df66f Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Wed, 7 Dec 2022 14:22:57 -0500
Subject: [PATCH 33/50] Revert "Better documentation and guidance (#102)"

This reverts commit 6ae6710aa7ae145f2d0b6917671f7a53c23975ec.
---
 configs/full.yaml                         |   9 +-
 docs/api/nequip.rst                       |   3 +-
 docs/api/trainer.rst                      |  10 --
 docs/cite.rst                             |   3 -
 docs/commandline/commands.rst             | 132 ------------------
 docs/faq/FAQ.rst                          |  14 --
 docs/{errors/errors.rst => guide/FAQ.rst} |  13 +-
 docs/{howto => guide}/conventions.rst     |   0
 docs/guide/guide.rst                      |   9 ++
 docs/guide/intro.rst                      |   4 +
 docs/guide/irreps.rst                     |   9 ++
 docs/{howto => guide}/migrate.rst         |   0
 docs/howto/dataset.rst                    | 156 ----------------------
 docs/howto/howto.rst                      |   7 -
 docs/index.rst                            |  12 +-
 docs/installation/install.rst             |  39 ------
 docs/introduction/intro.rst               |   4 -
 docs/lammps/all.rst                       |   7 -
 docs/lammps/ase.rst                       |   2 -
 docs/lammps/lammps.rst                    |   2 -
 docs/options/dataset.rst                  |  10 +-
 docs/yaml/yaml.rst                        |   4 -
 nequip/data/dataset.py                    |   2 +-
 nequip/scripts/deploy.py                  |   2 +-
 nequip/scripts/evaluate.py                |  10 +-
 nequip/scripts/train.py                   |   8 +-
 26 files changed, 50 insertions(+), 421 deletions(-)
 delete mode 100644 docs/api/trainer.rst
 delete mode 100644 docs/cite.rst
 delete mode 100644 docs/commandline/commands.rst
 delete mode 100644 docs/faq/FAQ.rst
 rename docs/{errors/errors.rst => guide/FAQ.rst} (56%)
 rename docs/{howto => guide}/conventions.rst (100%)
 create mode 100644 docs/guide/guide.rst
 create mode 100644 docs/guide/intro.rst
 create mode 100644 docs/guide/irreps.rst
 rename docs/{howto => guide}/migrate.rst (100%)
 delete mode 100644 docs/howto/dataset.rst
 delete mode 100644 docs/howto/howto.rst
 delete mode 100644 docs/installation/install.rst
 delete mode 100644 docs/introduction/intro.rst
 delete mode 100644 docs/lammps/all.rst
 delete mode 100644 docs/lammps/ase.rst
 delete mode 100644 docs/lammps/lammps.rst
 delete mode 100644 docs/yaml/yaml.rst

diff --git a/configs/full.yaml b/configs/full.yaml
index 2f98164e..d310c70a 100644
--- a/configs/full.yaml
+++ b/configs/full.yaml
@@ -211,8 +211,6 @@ loss_coeffs:
   total_energy:                                                                    
     - 1
     - PerAtomMSELoss
-# note that the ratio between force and energy loss matters for the training process. One may consider using 1:1 with the PerAtomMSELoss. If the energy loss still significantly dominate the loss function at the initial epochs, tune the energy loss weight lower helps the training a lot.
-
 
 # # default loss function is MSELoss, the name has to be exactly the same as those in torch.nn.
 # the only supprted targets are forces and total_energy
@@ -344,10 +342,9 @@ global_rescale_scale_trainable: false
 # global_rescale_shift_trainable: false
 # global_rescale_scale: dataset_forces_rms
 # global_rescale_scale_trainable: false
-# per_species_rescale_shifts_trainable: false
-# per_species_rescale_scales_trainable: true
-# per_species_rescale_shifts: dataset_per_species_total_energy_mean
-# per_species_rescale_scales: dataset_per_species_forces_rms
+# per_species_rescale_trainable: true
+# per_species_rescale_shifts: dataset_per_atom_total_energy_mean
+# per_species_rescale_scales: dataset_per_atom_total_energy_std
 
 # # full block needed for global rescale
 # global_rescale_shift: dataset_total_energy_mean
diff --git a/docs/api/nequip.rst b/docs/api/nequip.rst
index 6f6250cf..13bc37ca 100644
--- a/docs/api/nequip.rst
+++ b/docs/api/nequip.rst
@@ -3,5 +3,4 @@ Python API
 
  .. toctree::
 
-    data
-    trainer
+    data
\ No newline at end of file
diff --git a/docs/api/trainer.rst b/docs/api/trainer.rst
deleted file mode 100644
index 983e6f6b..00000000
--- a/docs/api/trainer.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-nequip.trainer
-==============
- 
- .. automodule:: nequip.train.trainer
-    :members:
-    :imported-members:
-
- .. automodule:: nequip.train.trainer_wandb
-    :members:
-    :imported-members:
diff --git a/docs/cite.rst b/docs/cite.rst
deleted file mode 100644
index 9f8296cc..00000000
--- a/docs/cite.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-Citing Nequip
-=============
-
diff --git a/docs/commandline/commands.rst b/docs/commandline/commands.rst
deleted file mode 100644
index b58c87ab..00000000
--- a/docs/commandline/commands.rst
+++ /dev/null
@@ -1,132 +0,0 @@
-Command-line Executables
-========================
-
-``nequip-train``
-----------------
-
- .. code ::
-
-    usage: nequip-train [-h] [--equivariance-test] [--model-debug-mode] [--grad-anomaly-mode] [--log LOG] config
-
-Train (or restart training of) a NequIP model.
-
-positional arguments:
-  config               YAML file configuring the model, dataset, and other options
-
-optional arguments:
-  -h, --help           show this help message and exit
-  --equivariance-test  test the model's equivariance before training
-  --model-debug-mode   enable model debug mode, which can sometimes give much more useful error messages at the
-                       cost of some speed. Do not use for production training!
-  --grad-anomaly-mode  enable PyTorch autograd anomaly mode to debug NaN gradients. Do not use for production
-                       training!
-  --log LOG            log file to store all the screen logging
-
-``nequip-evaluate``
--------------------
-
- .. code ::
-
-    usage: nequip-evaluate [-h] [--train-dir TRAIN_DIR] [--model MODEL] [--dataset-config DATASET_CONFIG]
-                        [--metrics-config METRICS_CONFIG] [--test-indexes TEST_INDEXES] [--batch-size BATCH_SIZE]
-                        [--device DEVICE] [--output OUTPUT] [--log LOG]
-
-Compute the error of a model on a test set using various metrics. The model, metrics, dataset, etc. can specified
-in individual YAML config files, or a training session can be indicated with ``--train-dir``. In order of priority,
-the global settings (dtype, TensorFloat32, etc.) are taken from: (1) the model config (for a training session), (2)
-the dataset config (for a deployed model), or (3) the defaults. Prints only the final result in ``name = num`` format
-to stdout; all other information is ``logging.debug``ed to stderr. WARNING: Please note that results of CUDA models
-are rarely exactly reproducible, and that even CPU models can be nondeterministic.
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --train-dir TRAIN_DIR
-                        Path to a working directory from a training session.
-  --model MODEL         A deployed or pickled NequIP model to load. If omitted, defaults to `best_model.pth` in
-                        `train_dir`.
-  --dataset-config DATASET_CONFIG
-                        A YAML config file specifying the dataset to load test data from. If omitted, `config.yaml`
-                        in `train_dir` will be used
-  --metrics-config METRICS_CONFIG
-                        A YAML config file specifying the metrics to compute. If omitted, `config.yaml` in
-                        `train_dir` will be used. If the config does not specify `metrics_components`, the default
-                        is to logging.debug MAEs and RMSEs for all fields given in the loss function. If the
-                        literal string `None`, no metrics will be computed.
-  --test-indexes TEST_INDEXES
-                        Path to a file containing the indexes in the dataset that make up the test set. If omitted,
-                        all data frames *not* used as training or validation data in the training session
-                        `train_dir` will be used.
-  --batch-size BATCH_SIZE
-                        Batch size to use. Larger is usually faster on GPU.
-  --device DEVICE       Device to run the model on. If not provided, defaults to CUDA if available and CPU
-                        otherwise.
-  --output OUTPUT       XYZ file to write out the test set and model predicted forces, energies, etc. to.
-  --log LOG             log file to store all the metrics and screen logging.debug
-
-``nequip-deploy``
------------------
-
- .. code ::
-
-    usage: nequip-deploy [-h] {info,build} ...
-
-Deploy and view information about previously deployed NequIP models.
-
-optional arguments:
-  -h, --help    show this help message and exit
-
-commands:
-  {info,build}
-    info        Get information from a deployed model file
-    build       Build a deployment model
-
-``nequip-deploy info``
-~~~~~~~~~~~~~~~~~~~~~~
-
- .. code ::
-
-    usage: nequip-deploy info [-h] model_path
-
-positional arguments:
-  model_path  Path to a deployed model file.
-
-optional arguments:
-  -h, --help  show this help message and exit
-
-
-``nequip-deploy build``
-~~~~~~~~~~~~~~~~~~~~~~~
-
- .. code ::
-
-    usage: nequip-deploy build [-h] train_dir out_file
-
-positional arguments:
-  train_dir   Path to a working directory from a training session.
-  out_file    Output file for deployed model.
-
-optional arguments:
-  -h, --help  show this help message and exit
-
-
-``nequip-benchmark``
---------------------
-
- .. code ::
-
-    usage: nequip-benchmark [-h] [--profile PROFILE] [--device DEVICE] [-n N] [--n-data N_DATA] [--timestep TIMESTEP]
-                            config
-
-Benchmark the approximate MD performance of a given model configuration / dataset pair.
-
-positional arguments:
-  config               configuration file
-
-optional arguments:
-  -h, --help           show this help message and exit
-  --profile PROFILE    Profile instead of timing, creating and outputing a Chrome trace JSON to the given path.
-  --device DEVICE      Device to run the model on. If not provided, defaults to CUDA if available and CPU
-                       otherwise.
-  -n N                 Number of trials.
-  --n-data N_DATA      Number of frames to use.
-  --timestep TIMESTEP  MD timestep for ns/day esimation, in fs. Defauts to 1fs.
diff --git a/docs/faq/FAQ.rst b/docs/faq/FAQ.rst
deleted file mode 100644
index 411e77c1..00000000
--- a/docs/faq/FAQ.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-FAQ
-===
-
-How do I...
------------
-
-... continue to train a model that reached a stopping condition?
-    There will be an answer here.
-
-1. Reload the model trained with version 0.3.3 to the code in 0.4.
-   check out the migration note at :ref:`migration_note`.
-
-2. Specify my dataset for `nequip-train` and `nequip-eval`, see :ref:`_dataset_note`.
-
diff --git a/docs/errors/errors.rst b/docs/guide/FAQ.rst
similarity index 56%
rename from docs/errors/errors.rst
rename to docs/guide/FAQ.rst
index 576e553d..92ac758e 100644
--- a/docs/errors/errors.rst
+++ b/docs/guide/FAQ.rst
@@ -1,5 +1,14 @@
-Errors
-======
+FAQ
+===
+
+How do I...
+-----------
+
+... continue to train a model that reached a stopping condition?
+    There will be an answer here.
+
+1. Reload the model trained with version 0.3.3 to the code in 0.4.
+   check out the migration note at :ref:`migration_note`.
 
 Common errors
 -------------
diff --git a/docs/howto/conventions.rst b/docs/guide/conventions.rst
similarity index 100%
rename from docs/howto/conventions.rst
rename to docs/guide/conventions.rst
diff --git a/docs/guide/guide.rst b/docs/guide/guide.rst
new file mode 100644
index 00000000..6def3859
--- /dev/null
+++ b/docs/guide/guide.rst
@@ -0,0 +1,9 @@
+NequIP User Guide
+=================
+
+ .. toctree::
+
+    intro
+    irreps
+    conventions
+    FAQ
\ No newline at end of file
diff --git a/docs/guide/intro.rst b/docs/guide/intro.rst
new file mode 100644
index 00000000..7afa4132
--- /dev/null
+++ b/docs/guide/intro.rst
@@ -0,0 +1,4 @@
+Tutorial: Introduction to NequIP
+================================
+
+TODO
\ No newline at end of file
diff --git a/docs/guide/irreps.rst b/docs/guide/irreps.rst
new file mode 100644
index 00000000..5f9b2735
--- /dev/null
+++ b/docs/guide/irreps.rst
@@ -0,0 +1,9 @@
+Irreps
+======
+
+.. _Irreps:
+
+Syntax to specify irreps
+------------------------
+
+TODO: descripe irreps syntax here
\ No newline at end of file
diff --git a/docs/howto/migrate.rst b/docs/guide/migrate.rst
similarity index 100%
rename from docs/howto/migrate.rst
rename to docs/guide/migrate.rst
diff --git a/docs/howto/dataset.rst b/docs/howto/dataset.rst
deleted file mode 100644
index 2b5267e7..00000000
--- a/docs/howto/dataset.rst
+++ /dev/null
@@ -1,156 +0,0 @@
-.. _dataset_note:
-   
-How to prepare training dataset
-===============================
-
-What does NequIP behind the scene
----------------------------------
-
-NequIP uses AtomicDataset class to store the atomic configurations. 
-During the initialization of an AtomicDataset object, 
-NequIP reads the atomic structures from the dataset, 
-computes the neighbor list and other data structures needed for the GNN 
-by converting raw data to a list of ``AtomicData`` objects.
-
-The computed results are then cached on harddisk ``root/processed_hashkey`` folder.
-The hashing is based on all the metadata provided for the dataset, 
-which includes the file name, the cutoff radius, float number precision and etc.
-In the case where multiple training/evaluation runs use the same dataset,
-the neighbor list will only be computed in the first NequIP run.
-The later runs will directly load the AtomicDataset object from the cache file to save computation time.
-
-Note: be careful to the cached file. If you update your raw data file but keep using the same filename,
-NequIP will not automatically update the cached data.
-
-Key concepts
-------------
-
-fixed_fields
-~~~~~~~~~~~~
-Fixed fields are the quantities that are shared among all the configurations in the dataset.
-For example, if the dataset is a trajectory of an NVT MD simulation, the super cell size and the atomic species 
-are indeed a constant matrix/vector through out the whole dataset.
-In this case, in stead of repeating the same values for many times, 
-we specify the cell and species as fixed fields and only provide them once.
-
-yaml interface
-~~~~~~~~~~~~~~
-``nequip-train`` and ``nequip-evaluate`` automatically construct the AtomicDataset based on the yaml arguments.
-Later sections offer a couple different examples.
-
-If the training and validation datasets are from different raw files, the arguments for each set
-can be defined with ``dataset`` prefix and ``validation_dataset`` prefix, respectively.
-
-For example, ``dataset_file_name`` is used for training data and ``validation_dataset_file_name`` is for validation data.
-
-Python interface
-~~~~~~~~~~~~~~~~
-See ``nequip.data.dataset.AtomicInMemoryDataset``.
-
-Prepare dataset and specify in yaml config
-------------------------------------------
-
-ASE format
-~~~~~~~~~~
-
-NequIP accept all format that can be parsed by `ase.io.read` function. 
-We recommend `extxyz`.
-
-Example: Given an atomic data stored in "H2.extxyz" that looks like below:
-
-.. code:: extxyz
-
-   2
-   Properties=species:S:1:pos:R:3 energy=-10 user_label=2.0 pbc="F F F"
-   H       0.00000000       0.00000000       0.00000000
-   H       0.00000000       0.00000000       1.02000000
-
-The yaml input should be
-
-.. code:: yaml
-
-   dataset: ase
-   dataset_file_name: H2.extxyz
-   ase_args:
-   format: extxyz
-   include_keys:
-     - user_label
-   key_mapping:
-     user_label: label0
-   chemical_symbol_to_type:
-     H: 0
-
-For other formats than `extxyz`, be careful to the ase parsers; they may have different behavior from the extxyz parser.
-For example, the ase vasp parser store potential energy to `free_energy` instead of `energy`.
-Because we optimize our code to the `extxyz` parser, NequIP will not be able to load any `total_energy` labels.
-We need some additional keys to help NequIP to understand the situtaion
-Here's an example for vasp outcar. 
-
-.. code:: yaml
-
-   dataset: ase
-   dataset_file_name: OUTCAR
-   ase_args:
-     format: vasp-out
-   key_mapping:
-     free_energy: total_energy
-   chemical_symbol_to_type:
-     H: 0
-
-The way around is to use key mapping, please see more note below.
-
-NPZ format
-~~~~~~~~~~
-
-If your dataset constitute configurations that always have the same number of atoms, npz data format can be an option.
-
-In the npz file, all the values should have the same row as the number of the configurations. 
-For example, the force array of 36 atomic configurations of an N-atom system should have the shape of (36, N, 3);
-their total_energy array should have the shape of (36).
-
-Below is an example of the yaml specification.
-
-.. code:: yaml
-
-   dataset: npz
-   dataset_file_name: example.npz
-   include_keys:
-     - user_label1
-     - user_label2
-   npz_fixed_field_keys:
-     - cell
-     - atomic_numbers
-   key_mapping:
-     position: pos
-     force: forces
-     energy: total_energy
-     Z: atomic_numbers
-
-
-Note on key mapping
-~~~~~~~~~~~~~~~~~~~
-
-NequIP has default key names for energy, force, cell (defined at nequip.data._keys)
-Unlike in the ASE format where these information is automatically parsed,
-in the npz data format, the correct key names have to be provided.
-The common key names are: `total_energy`, `forces`, `atomic_numbers`, `pos`, `cell`, `pbc`.
-the key_mapping can help to convert the user defined name (key) to NequIP default name (value).
-
-
-Advanced options
-----------------
-
-skip frames during data processing
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The `include_frame` argument can be specified in yaml to skip certain frames in the raw datafile.
-The item has to be a list or a python iteratable object.
-
-register user-defined graph, node, edge fields
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Graph, node, edge fields are quantities that belong to 
-the whole graph, each atom, each edge, respectively.
-Example graph fields include cell, pbc, and total_energy.
-Example node fields include pos, forces 
-
-To help NequIP to properly assemble the batch data, graph quantity other than 
-cell, pbc, total_energy should be registered.
diff --git a/docs/howto/howto.rst b/docs/howto/howto.rst
deleted file mode 100644
index 07e84e84..00000000
--- a/docs/howto/howto.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-How-to Tutorials
-================
-
- .. toctree::
-
-    dataset
-    migrate
diff --git a/docs/index.rst b/docs/index.rst
index d2edd1a6..dc6ecd43 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,20 +9,12 @@ NequIP
 NequIP is an open-source package for creating, training, and using E(3)-equivariant machine learning interatomic potentials.
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 3
    :caption: Contents:
 
-   introduction/intro
-   cite
-   installation/install
-   yaml/yaml
-   howto/howto
-   faq/FAQ
-   commandline/commands
-   lammps/all
+   guide/guide
    options/options
    api/nequip
-   errors/errors
 
 
 
diff --git a/docs/installation/install.rst b/docs/installation/install.rst
deleted file mode 100644
index 3e946815..00000000
--- a/docs/installation/install.rst
+++ /dev/null
@@ -1,39 +0,0 @@
-Installation
-============
-
-NequIP requires:
-
- * Python >= 3.6
- * PyTorch >= 1.8, <=1.11.*. PyTorch can be installed following the `instructions from their documentation <https://pytorch.org/get-started/locally/>`_. Note that neither ``torchvision`` nor ``torchaudio``, included in the default install command, are needed for NequIP.
-
-To install:
-
- * We use `Weights&Biases <https://wandb.ai>`_ to keep track of experiments. This is not a strict requirement — you can use our package without it — but it may make your life easier. If you want to use it, create an account `here <https://wandb.ai/login?signup=true>`_ and install the Python package::
-
-    pip install wandb
-
- * Install the latest stable NequIP::
-
-    pip install https://github.com/mir-group/nequip/archive/main.zip
-
-To install previous versions of NequIP, please clone the repository from GitHub and check out the appropriate tag (for example ``v0.3.3`` for version 0.3.3).
-
-To install the current **unstable** development version of NequIP, please clone our repository and check out the ``develop`` branch.
-
-Installation Issues
--------------------
-
-The easiest way to check if your installation is working is to train a _toy_ model::
-
-    nequip-train configs/minimal.yaml
-
-If you suspect something is wrong, encounter errors, or just want to confirm that everything is in working order, you can also run the unit tests::
-
-    pip install pytest
-    pytest tests/unit/
-
-To run the full tests, including a set of longer/more intensive integration tests, run::
-
-    pytest tests/
-
-If a GPU is present, the unit tests will use it.
\ No newline at end of file
diff --git a/docs/introduction/intro.rst b/docs/introduction/intro.rst
deleted file mode 100644
index e0dcc32c..00000000
--- a/docs/introduction/intro.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-Overview
-========
-
-TODO
diff --git a/docs/lammps/all.rst b/docs/lammps/all.rst
deleted file mode 100644
index 9faac07e..00000000
--- a/docs/lammps/all.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Integration to LAMMPS, ASE
-==========================
-
- .. toctree::
-
-    lammps
-    ase
diff --git a/docs/lammps/ase.rst b/docs/lammps/ase.rst
deleted file mode 100644
index 3729cde3..00000000
--- a/docs/lammps/ase.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-ASE
-===
diff --git a/docs/lammps/lammps.rst b/docs/lammps/lammps.rst
deleted file mode 100644
index f9d0ba9f..00000000
--- a/docs/lammps/lammps.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-LAMMPS
-======
diff --git a/docs/options/dataset.rst b/docs/options/dataset.rst
index f3ca194c..54b39fc9 100644
--- a/docs/options/dataset.rst
+++ b/docs/options/dataset.rst
@@ -33,7 +33,7 @@ key_mapping
     | Type: dict
     | Default: ``{'positions': 'pos', 'energy': 'total_energy', 'force': 'forces', 'forces': 'forces', 'Z': 'atomic_numbers', 'atomic_number': 'atomic_numbers'}``
 
-include_keys
+npz_keys
 ^^^^^^^^
     | Type: list
     | Default: ``[]``
@@ -68,11 +68,5 @@ include_frames
     | Type: NoneType
     | Default: ``None``
 
-ase_args
-^^^^^^^^
-    | Type: dict
-    | Default: ``{}``
-
 Advanced
---------
-See tutorial on :ref:`../guide/_dataset_note`.
+--------
\ No newline at end of file
diff --git a/docs/yaml/yaml.rst b/docs/yaml/yaml.rst
deleted file mode 100644
index fd804436..00000000
--- a/docs/yaml/yaml.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-YAML input
-==========
-
-TODO
diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py
index c38b8eae..2b2279d9 100644
--- a/nequip/data/dataset.py
+++ b/nequip/data/dataset.py
@@ -641,7 +641,7 @@ class NpzDataset(AtomicInMemoryDataset):
     """Load data from an npz file.
 
     To avoid loading unneeded data, keys are ignored by default unless they are in ``key_mapping``, ``include_keys``,
-    ``npz_fixed_fields_keys`` or ``extra_fixed_fields``.
+    ``npz_fixed_fields`` or ``extra_fixed_fields``.
 
     Args:
         key_mapping (Dict[str, str]): mapping of npz keys to ``AtomicData`` keys. Optional
diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py
index 4d7686e1..7aafd5fc 100644
--- a/nequip/scripts/deploy.py
+++ b/nequip/scripts/deploy.py
@@ -129,7 +129,7 @@ def load_deployed_model(
 
 def main(args=None):
     parser = argparse.ArgumentParser(
-        description="Deploy and view information about previously deployed NequIP models."
+        description="Create and view information about deployed NequIP potentials."
     )
     # backward compat for 3.6
     if sys.version_info[1] > 6:
diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py
index f7dfa12b..7c4e2abc 100644
--- a/nequip/scripts/evaluate.py
+++ b/nequip/scripts/evaluate.py
@@ -30,13 +30,13 @@ def main(args=None, running_as_script: bool = True):
         description=textwrap.dedent(
             """Compute the error of a model on a test set using various metrics.
 
-            The model, metrics, dataset, etc. can specified in individual YAML config files, or a training session can be indicated with `--train-dir`.
+            The model, metrics, dataset, etc. can specified individually, or a training session can be indicated with `--train-dir`.
             In order of priority, the global settings (dtype, TensorFloat32, etc.) are taken from:
-              (1) the model config (for a training session),
-              (2) the dataset config (for a deployed model),
-              or (3) the defaults.
+              1. The model config (for a training session)
+              2. The dataset config (for a deployed model)
+              3. The defaults
 
-            Prints only the final result in `name = num` format to stdout; all other information is `logging.debug`ed to stderr.
+            Prints only the final result in `name = num` format to stdout; all other information is logging.debuged to stderr.
 
             WARNING: Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic.
             """
diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py
index 88b55f7e..c6aa7785 100644
--- a/nequip/scripts/train.py
+++ b/nequip/scripts/train.py
@@ -81,12 +81,8 @@ def main(args=None, running_as_script: bool = True):
 
 
 def parse_command_line(args=None):
-    parser = argparse.ArgumentParser(
-        description="Train (or restart training of) a NequIP model."
-    )
-    parser.add_argument(
-        "config", help="YAML file configuring the model, dataset, and other options"
-    )
+    parser = argparse.ArgumentParser(description="Train a NequIP model.")
+    parser.add_argument("config", help="configuration file")
     parser.add_argument(
         "--equivariance-test",
         help="test the model's equivariance before training on n (default 1) random frames from the dataset",

From 2b5eefb1891a8580ea7ec646bf71828e9ae188f2 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Wed, 7 Dec 2022 14:24:05 -0500
Subject: [PATCH 34/50] Revert "Revert "Better documentation and guidance
 (#102)""

This reverts commit 89977eab75dbc594dc3f7751b77f7f833a0df66f.
---
 configs/full.yaml                         |   9 +-
 docs/api/nequip.rst                       |   3 +-
 docs/api/trainer.rst                      |  10 ++
 docs/cite.rst                             |   3 +
 docs/commandline/commands.rst             | 132 ++++++++++++++++++
 docs/{guide/FAQ.rst => errors/errors.rst} |  13 +-
 docs/faq/FAQ.rst                          |  14 ++
 docs/guide/guide.rst                      |   9 --
 docs/guide/intro.rst                      |   4 -
 docs/guide/irreps.rst                     |   9 --
 docs/{guide => howto}/conventions.rst     |   0
 docs/howto/dataset.rst                    | 156 ++++++++++++++++++++++
 docs/howto/howto.rst                      |   7 +
 docs/{guide => howto}/migrate.rst         |   0
 docs/index.rst                            |  12 +-
 docs/installation/install.rst             |  39 ++++++
 docs/introduction/intro.rst               |   4 +
 docs/lammps/all.rst                       |   7 +
 docs/lammps/ase.rst                       |   2 +
 docs/lammps/lammps.rst                    |   2 +
 docs/options/dataset.rst                  |  10 +-
 docs/yaml/yaml.rst                        |   4 +
 nequip/data/dataset.py                    |   2 +-
 nequip/scripts/deploy.py                  |   2 +-
 nequip/scripts/evaluate.py                |  10 +-
 nequip/scripts/train.py                   |   8 +-
 26 files changed, 421 insertions(+), 50 deletions(-)
 create mode 100644 docs/api/trainer.rst
 create mode 100644 docs/cite.rst
 create mode 100644 docs/commandline/commands.rst
 rename docs/{guide/FAQ.rst => errors/errors.rst} (56%)
 create mode 100644 docs/faq/FAQ.rst
 delete mode 100644 docs/guide/guide.rst
 delete mode 100644 docs/guide/intro.rst
 delete mode 100644 docs/guide/irreps.rst
 rename docs/{guide => howto}/conventions.rst (100%)
 create mode 100644 docs/howto/dataset.rst
 create mode 100644 docs/howto/howto.rst
 rename docs/{guide => howto}/migrate.rst (100%)
 create mode 100644 docs/installation/install.rst
 create mode 100644 docs/introduction/intro.rst
 create mode 100644 docs/lammps/all.rst
 create mode 100644 docs/lammps/ase.rst
 create mode 100644 docs/lammps/lammps.rst
 create mode 100644 docs/yaml/yaml.rst

diff --git a/configs/full.yaml b/configs/full.yaml
index d310c70a..2f98164e 100644
--- a/configs/full.yaml
+++ b/configs/full.yaml
@@ -211,6 +211,8 @@ loss_coeffs:
   total_energy:                                                                    
     - 1
     - PerAtomMSELoss
+# note that the ratio between force and energy loss matters for the training process. One may consider using 1:1 with the PerAtomMSELoss. If the energy loss still significantly dominate the loss function at the initial epochs, tune the energy loss weight lower helps the training a lot.
+
 
 # # default loss function is MSELoss, the name has to be exactly the same as those in torch.nn.
 # the only supprted targets are forces and total_energy
@@ -342,9 +344,10 @@ global_rescale_scale_trainable: false
 # global_rescale_shift_trainable: false
 # global_rescale_scale: dataset_forces_rms
 # global_rescale_scale_trainable: false
-# per_species_rescale_trainable: true
-# per_species_rescale_shifts: dataset_per_atom_total_energy_mean
-# per_species_rescale_scales: dataset_per_atom_total_energy_std
+# per_species_rescale_shifts_trainable: false
+# per_species_rescale_scales_trainable: true
+# per_species_rescale_shifts: dataset_per_species_total_energy_mean
+# per_species_rescale_scales: dataset_per_species_forces_rms
 
 # # full block needed for global rescale
 # global_rescale_shift: dataset_total_energy_mean
diff --git a/docs/api/nequip.rst b/docs/api/nequip.rst
index 13bc37ca..6f6250cf 100644
--- a/docs/api/nequip.rst
+++ b/docs/api/nequip.rst
@@ -3,4 +3,5 @@ Python API
 
  .. toctree::
 
-    data
\ No newline at end of file
+    data
+    trainer
diff --git a/docs/api/trainer.rst b/docs/api/trainer.rst
new file mode 100644
index 00000000..983e6f6b
--- /dev/null
+++ b/docs/api/trainer.rst
@@ -0,0 +1,10 @@
+nequip.trainer
+==============
+ 
+ .. automodule:: nequip.train.trainer
+    :members:
+    :imported-members:
+
+ .. automodule:: nequip.train.trainer_wandb
+    :members:
+    :imported-members:
diff --git a/docs/cite.rst b/docs/cite.rst
new file mode 100644
index 00000000..9f8296cc
--- /dev/null
+++ b/docs/cite.rst
@@ -0,0 +1,3 @@
+Citing Nequip
+=============
+
diff --git a/docs/commandline/commands.rst b/docs/commandline/commands.rst
new file mode 100644
index 00000000..b58c87ab
--- /dev/null
+++ b/docs/commandline/commands.rst
@@ -0,0 +1,132 @@
+Command-line Executables
+========================
+
+``nequip-train``
+----------------
+
+ .. code ::
+
+    usage: nequip-train [-h] [--equivariance-test] [--model-debug-mode] [--grad-anomaly-mode] [--log LOG] config
+
+Train (or restart training of) a NequIP model.
+
+positional arguments:
+  config               YAML file configuring the model, dataset, and other options
+
+optional arguments:
+  -h, --help           show this help message and exit
+  --equivariance-test  test the model's equivariance before training
+  --model-debug-mode   enable model debug mode, which can sometimes give much more useful error messages at the
+                       cost of some speed. Do not use for production training!
+  --grad-anomaly-mode  enable PyTorch autograd anomaly mode to debug NaN gradients. Do not use for production
+                       training!
+  --log LOG            log file to store all the screen logging
+
+``nequip-evaluate``
+-------------------
+
+ .. code ::
+
+    usage: nequip-evaluate [-h] [--train-dir TRAIN_DIR] [--model MODEL] [--dataset-config DATASET_CONFIG]
+                        [--metrics-config METRICS_CONFIG] [--test-indexes TEST_INDEXES] [--batch-size BATCH_SIZE]
+                        [--device DEVICE] [--output OUTPUT] [--log LOG]
+
+Compute the error of a model on a test set using various metrics. The model, metrics, dataset, etc. can specified
+in individual YAML config files, or a training session can be indicated with ``--train-dir``. In order of priority,
+the global settings (dtype, TensorFloat32, etc.) are taken from: (1) the model config (for a training session), (2)
+the dataset config (for a deployed model), or (3) the defaults. Prints only the final result in ``name = num`` format
+to stdout; all other information is ``logging.debug``ed to stderr. WARNING: Please note that results of CUDA models
+are rarely exactly reproducible, and that even CPU models can be nondeterministic.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --train-dir TRAIN_DIR
+                        Path to a working directory from a training session.
+  --model MODEL         A deployed or pickled NequIP model to load. If omitted, defaults to `best_model.pth` in
+                        `train_dir`.
+  --dataset-config DATASET_CONFIG
+                        A YAML config file specifying the dataset to load test data from. If omitted, `config.yaml`
+                        in `train_dir` will be used
+  --metrics-config METRICS_CONFIG
+                        A YAML config file specifying the metrics to compute. If omitted, `config.yaml` in
+                        `train_dir` will be used. If the config does not specify `metrics_components`, the default
+                        is to logging.debug MAEs and RMSEs for all fields given in the loss function. If the
+                        literal string `None`, no metrics will be computed.
+  --test-indexes TEST_INDEXES
+                        Path to a file containing the indexes in the dataset that make up the test set. If omitted,
+                        all data frames *not* used as training or validation data in the training session
+                        `train_dir` will be used.
+  --batch-size BATCH_SIZE
+                        Batch size to use. Larger is usually faster on GPU.
+  --device DEVICE       Device to run the model on. If not provided, defaults to CUDA if available and CPU
+                        otherwise.
+  --output OUTPUT       XYZ file to write out the test set and model predicted forces, energies, etc. to.
+  --log LOG             log file to store all the metrics and screen logging.debug
+
+``nequip-deploy``
+-----------------
+
+ .. code ::
+
+    usage: nequip-deploy [-h] {info,build} ...
+
+Deploy and view information about previously deployed NequIP models.
+
+optional arguments:
+  -h, --help    show this help message and exit
+
+commands:
+  {info,build}
+    info        Get information from a deployed model file
+    build       Build a deployment model
+
+``nequip-deploy info``
+~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code ::
+
+    usage: nequip-deploy info [-h] model_path
+
+positional arguments:
+  model_path  Path to a deployed model file.
+
+optional arguments:
+  -h, --help  show this help message and exit
+
+
+``nequip-deploy build``
+~~~~~~~~~~~~~~~~~~~~~~~
+
+ .. code ::
+
+    usage: nequip-deploy build [-h] train_dir out_file
+
+positional arguments:
+  train_dir   Path to a working directory from a training session.
+  out_file    Output file for deployed model.
+
+optional arguments:
+  -h, --help  show this help message and exit
+
+
+``nequip-benchmark``
+--------------------
+
+ .. code ::
+
+    usage: nequip-benchmark [-h] [--profile PROFILE] [--device DEVICE] [-n N] [--n-data N_DATA] [--timestep TIMESTEP]
+                            config
+
+Benchmark the approximate MD performance of a given model configuration / dataset pair.
+
+positional arguments:
+  config               configuration file
+
+optional arguments:
+  -h, --help           show this help message and exit
+  --profile PROFILE    Profile instead of timing, creating and outputing a Chrome trace JSON to the given path.
+  --device DEVICE      Device to run the model on. If not provided, defaults to CUDA if available and CPU
+                       otherwise.
+  -n N                 Number of trials.
+  --n-data N_DATA      Number of frames to use.
+  --timestep TIMESTEP  MD timestep for ns/day esimation, in fs. Defauts to 1fs.
diff --git a/docs/guide/FAQ.rst b/docs/errors/errors.rst
similarity index 56%
rename from docs/guide/FAQ.rst
rename to docs/errors/errors.rst
index 92ac758e..576e553d 100644
--- a/docs/guide/FAQ.rst
+++ b/docs/errors/errors.rst
@@ -1,14 +1,5 @@
-FAQ
-===
-
-How do I...
------------
-
-... continue to train a model that reached a stopping condition?
-    There will be an answer here.
-
-1. Reload the model trained with version 0.3.3 to the code in 0.4.
-   check out the migration note at :ref:`migration_note`.
+Errors
+======
 
 Common errors
 -------------
diff --git a/docs/faq/FAQ.rst b/docs/faq/FAQ.rst
new file mode 100644
index 00000000..411e77c1
--- /dev/null
+++ b/docs/faq/FAQ.rst
@@ -0,0 +1,14 @@
+FAQ
+===
+
+How do I...
+-----------
+
+... continue to train a model that reached a stopping condition?
+    There will be an answer here.
+
+1. Reload the model trained with version 0.3.3 to the code in 0.4.
+   check out the migration note at :ref:`migration_note`.
+
+2. Specify my dataset for `nequip-train` and `nequip-eval`, see :ref:`_dataset_note`.
+
diff --git a/docs/guide/guide.rst b/docs/guide/guide.rst
deleted file mode 100644
index 6def3859..00000000
--- a/docs/guide/guide.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-NequIP User Guide
-=================
-
- .. toctree::
-
-    intro
-    irreps
-    conventions
-    FAQ
\ No newline at end of file
diff --git a/docs/guide/intro.rst b/docs/guide/intro.rst
deleted file mode 100644
index 7afa4132..00000000
--- a/docs/guide/intro.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-Tutorial: Introduction to NequIP
-================================
-
-TODO
\ No newline at end of file
diff --git a/docs/guide/irreps.rst b/docs/guide/irreps.rst
deleted file mode 100644
index 5f9b2735..00000000
--- a/docs/guide/irreps.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-Irreps
-======
-
-.. _Irreps:
-
-Syntax to specify irreps
-------------------------
-
-TODO: descripe irreps syntax here
\ No newline at end of file
diff --git a/docs/guide/conventions.rst b/docs/howto/conventions.rst
similarity index 100%
rename from docs/guide/conventions.rst
rename to docs/howto/conventions.rst
diff --git a/docs/howto/dataset.rst b/docs/howto/dataset.rst
new file mode 100644
index 00000000..2b5267e7
--- /dev/null
+++ b/docs/howto/dataset.rst
@@ -0,0 +1,156 @@
+.. _dataset_note:
+   
+How to prepare training dataset
+===============================
+
+What does NequIP behind the scene
+---------------------------------
+
+NequIP uses AtomicDataset class to store the atomic configurations. 
+During the initialization of an AtomicDataset object, 
+NequIP reads the atomic structures from the dataset, 
+computes the neighbor list and other data structures needed for the GNN 
+by converting raw data to a list of ``AtomicData`` objects.
+
+The computed results are then cached on harddisk ``root/processed_hashkey`` folder.
+The hashing is based on all the metadata provided for the dataset, 
+which includes the file name, the cutoff radius, float number precision and etc.
+In the case where multiple training/evaluation runs use the same dataset,
+the neighbor list will only be computed in the first NequIP run.
+The later runs will directly load the AtomicDataset object from the cache file to save computation time.
+
+Note: be careful to the cached file. If you update your raw data file but keep using the same filename,
+NequIP will not automatically update the cached data.
+
+Key concepts
+------------
+
+fixed_fields
+~~~~~~~~~~~~
+Fixed fields are the quantities that are shared among all the configurations in the dataset.
+For example, if the dataset is a trajectory of an NVT MD simulation, the super cell size and the atomic species 
+are indeed a constant matrix/vector through out the whole dataset.
+In this case, in stead of repeating the same values for many times, 
+we specify the cell and species as fixed fields and only provide them once.
+
+yaml interface
+~~~~~~~~~~~~~~
+``nequip-train`` and ``nequip-evaluate`` automatically construct the AtomicDataset based on the yaml arguments.
+Later sections offer a couple different examples.
+
+If the training and validation datasets are from different raw files, the arguments for each set
+can be defined with ``dataset`` prefix and ``validation_dataset`` prefix, respectively.
+
+For example, ``dataset_file_name`` is used for training data and ``validation_dataset_file_name`` is for validation data.
+
+Python interface
+~~~~~~~~~~~~~~~~
+See ``nequip.data.dataset.AtomicInMemoryDataset``.
+
+Prepare dataset and specify in yaml config
+------------------------------------------
+
+ASE format
+~~~~~~~~~~
+
+NequIP accept all format that can be parsed by `ase.io.read` function. 
+We recommend `extxyz`.
+
+Example: Given an atomic data stored in "H2.extxyz" that looks like below:
+
+.. code:: extxyz
+
+   2
+   Properties=species:S:1:pos:R:3 energy=-10 user_label=2.0 pbc="F F F"
+   H       0.00000000       0.00000000       0.00000000
+   H       0.00000000       0.00000000       1.02000000
+
+The yaml input should be
+
+.. code:: yaml
+
+   dataset: ase
+   dataset_file_name: H2.extxyz
+   ase_args:
+   format: extxyz
+   include_keys:
+     - user_label
+   key_mapping:
+     user_label: label0
+   chemical_symbol_to_type:
+     H: 0
+
+For other formats than `extxyz`, be careful to the ase parsers; they may have different behavior from the extxyz parser.
+For example, the ase vasp parser store potential energy to `free_energy` instead of `energy`.
+Because we optimize our code to the `extxyz` parser, NequIP will not be able to load any `total_energy` labels.
+We need some additional keys to help NequIP to understand the situtaion
+Here's an example for vasp outcar. 
+
+.. code:: yaml
+
+   dataset: ase
+   dataset_file_name: OUTCAR
+   ase_args:
+     format: vasp-out
+   key_mapping:
+     free_energy: total_energy
+   chemical_symbol_to_type:
+     H: 0
+
+The way around is to use key mapping, please see more note below.
+
+NPZ format
+~~~~~~~~~~
+
+If your dataset constitute configurations that always have the same number of atoms, npz data format can be an option.
+
+In the npz file, all the values should have the same row as the number of the configurations. 
+For example, the force array of 36 atomic configurations of an N-atom system should have the shape of (36, N, 3);
+their total_energy array should have the shape of (36).
+
+Below is an example of the yaml specification.
+
+.. code:: yaml
+
+   dataset: npz
+   dataset_file_name: example.npz
+   include_keys:
+     - user_label1
+     - user_label2
+   npz_fixed_field_keys:
+     - cell
+     - atomic_numbers
+   key_mapping:
+     position: pos
+     force: forces
+     energy: total_energy
+     Z: atomic_numbers
+
+
+Note on key mapping
+~~~~~~~~~~~~~~~~~~~
+
+NequIP has default key names for energy, force, cell (defined at nequip.data._keys)
+Unlike in the ASE format where these information is automatically parsed,
+in the npz data format, the correct key names have to be provided.
+The common key names are: `total_energy`, `forces`, `atomic_numbers`, `pos`, `cell`, `pbc`.
+the key_mapping can help to convert the user defined name (key) to NequIP default name (value).
+
+
+Advanced options
+----------------
+
+skip frames during data processing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The `include_frame` argument can be specified in yaml to skip certain frames in the raw datafile.
+The item has to be a list or a python iteratable object.
+
+register user-defined graph, node, edge fields
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Graph, node, edge fields are quantities that belong to 
+the whole graph, each atom, each edge, respectively.
+Example graph fields include cell, pbc, and total_energy.
+Example node fields include pos, forces 
+
+To help NequIP to properly assemble the batch data, graph quantity other than 
+cell, pbc, total_energy should be registered.
diff --git a/docs/howto/howto.rst b/docs/howto/howto.rst
new file mode 100644
index 00000000..07e84e84
--- /dev/null
+++ b/docs/howto/howto.rst
@@ -0,0 +1,7 @@
+How-to Tutorials
+================
+
+ .. toctree::
+
+    dataset
+    migrate
diff --git a/docs/guide/migrate.rst b/docs/howto/migrate.rst
similarity index 100%
rename from docs/guide/migrate.rst
rename to docs/howto/migrate.rst
diff --git a/docs/index.rst b/docs/index.rst
index dc6ecd43..d2edd1a6 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,12 +9,20 @@ NequIP
 NequIP is an open-source package for creating, training, and using E(3)-equivariant machine learning interatomic potentials.
 
 .. toctree::
-   :maxdepth: 3
+   :maxdepth: 2
    :caption: Contents:
 
-   guide/guide
+   introduction/intro
+   cite
+   installation/install
+   yaml/yaml
+   howto/howto
+   faq/FAQ
+   commandline/commands
+   lammps/all
    options/options
    api/nequip
+   errors/errors
 
 
 
diff --git a/docs/installation/install.rst b/docs/installation/install.rst
new file mode 100644
index 00000000..3e946815
--- /dev/null
+++ b/docs/installation/install.rst
@@ -0,0 +1,39 @@
+Installation
+============
+
+NequIP requires:
+
+ * Python >= 3.6
+ * PyTorch >= 1.8, <=1.11.*. PyTorch can be installed following the `instructions from their documentation <https://pytorch.org/get-started/locally/>`_. Note that neither ``torchvision`` nor ``torchaudio``, included in the default install command, are needed for NequIP.
+
+To install:
+
+ * We use `Weights&Biases <https://wandb.ai>`_ to keep track of experiments. This is not a strict requirement — you can use our package without it — but it may make your life easier. If you want to use it, create an account `here <https://wandb.ai/login?signup=true>`_ and install the Python package::
+
+    pip install wandb
+
+ * Install the latest stable NequIP::
+
+    pip install https://github.com/mir-group/nequip/archive/main.zip
+
+To install previous versions of NequIP, please clone the repository from GitHub and check out the appropriate tag (for example ``v0.3.3`` for version 0.3.3).
+
+To install the current **unstable** development version of NequIP, please clone our repository and check out the ``develop`` branch.
+
+Installation Issues
+-------------------
+
+The easiest way to check if your installation is working is to train a _toy_ model::
+
+    nequip-train configs/minimal.yaml
+
+If you suspect something is wrong, encounter errors, or just want to confirm that everything is in working order, you can also run the unit tests::
+
+    pip install pytest
+    pytest tests/unit/
+
+To run the full tests, including a set of longer/more intensive integration tests, run::
+
+    pytest tests/
+
+If a GPU is present, the unit tests will use it.
\ No newline at end of file
diff --git a/docs/introduction/intro.rst b/docs/introduction/intro.rst
new file mode 100644
index 00000000..e0dcc32c
--- /dev/null
+++ b/docs/introduction/intro.rst
@@ -0,0 +1,4 @@
+Overview
+========
+
+TODO
diff --git a/docs/lammps/all.rst b/docs/lammps/all.rst
new file mode 100644
index 00000000..9faac07e
--- /dev/null
+++ b/docs/lammps/all.rst
@@ -0,0 +1,7 @@
+Integration to LAMMPS, ASE
+==========================
+
+ .. toctree::
+
+    lammps
+    ase
diff --git a/docs/lammps/ase.rst b/docs/lammps/ase.rst
new file mode 100644
index 00000000..3729cde3
--- /dev/null
+++ b/docs/lammps/ase.rst
@@ -0,0 +1,2 @@
+ASE
+===
diff --git a/docs/lammps/lammps.rst b/docs/lammps/lammps.rst
new file mode 100644
index 00000000..f9d0ba9f
--- /dev/null
+++ b/docs/lammps/lammps.rst
@@ -0,0 +1,2 @@
+LAMMPS
+======
diff --git a/docs/options/dataset.rst b/docs/options/dataset.rst
index 54b39fc9..f3ca194c 100644
--- a/docs/options/dataset.rst
+++ b/docs/options/dataset.rst
@@ -33,7 +33,7 @@ key_mapping
     | Type: dict
     | Default: ``{'positions': 'pos', 'energy': 'total_energy', 'force': 'forces', 'forces': 'forces', 'Z': 'atomic_numbers', 'atomic_number': 'atomic_numbers'}``
 
-npz_keys
+include_keys
 ^^^^^^^^
     | Type: list
     | Default: ``[]``
@@ -68,5 +68,11 @@ include_frames
     | Type: NoneType
     | Default: ``None``
 
+ase_args
+^^^^^^^^
+    | Type: dict
+    | Default: ``{}``
+
 Advanced
---------
\ No newline at end of file
+--------
+See tutorial on :ref:`../guide/_dataset_note`.
diff --git a/docs/yaml/yaml.rst b/docs/yaml/yaml.rst
new file mode 100644
index 00000000..fd804436
--- /dev/null
+++ b/docs/yaml/yaml.rst
@@ -0,0 +1,4 @@
+YAML input
+==========
+
+TODO
diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py
index 2b2279d9..c38b8eae 100644
--- a/nequip/data/dataset.py
+++ b/nequip/data/dataset.py
@@ -641,7 +641,7 @@ class NpzDataset(AtomicInMemoryDataset):
     """Load data from an npz file.
 
     To avoid loading unneeded data, keys are ignored by default unless they are in ``key_mapping``, ``include_keys``,
-    ``npz_fixed_fields`` or ``extra_fixed_fields``.
+    ``npz_fixed_fields_keys`` or ``extra_fixed_fields``.
 
     Args:
         key_mapping (Dict[str, str]): mapping of npz keys to ``AtomicData`` keys. Optional
diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py
index 7aafd5fc..4d7686e1 100644
--- a/nequip/scripts/deploy.py
+++ b/nequip/scripts/deploy.py
@@ -129,7 +129,7 @@ def load_deployed_model(
 
 def main(args=None):
     parser = argparse.ArgumentParser(
-        description="Create and view information about deployed NequIP potentials."
+        description="Deploy and view information about previously deployed NequIP models."
     )
     # backward compat for 3.6
     if sys.version_info[1] > 6:
diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py
index 7c4e2abc..f7dfa12b 100644
--- a/nequip/scripts/evaluate.py
+++ b/nequip/scripts/evaluate.py
@@ -30,13 +30,13 @@ def main(args=None, running_as_script: bool = True):
         description=textwrap.dedent(
             """Compute the error of a model on a test set using various metrics.
 
-            The model, metrics, dataset, etc. can specified individually, or a training session can be indicated with `--train-dir`.
+            The model, metrics, dataset, etc. can specified in individual YAML config files, or a training session can be indicated with `--train-dir`.
             In order of priority, the global settings (dtype, TensorFloat32, etc.) are taken from:
-              1. The model config (for a training session)
-              2. The dataset config (for a deployed model)
-              3. The defaults
+              (1) the model config (for a training session),
+              (2) the dataset config (for a deployed model),
+              or (3) the defaults.
 
-            Prints only the final result in `name = num` format to stdout; all other information is logging.debuged to stderr.
+            Prints only the final result in `name = num` format to stdout; all other information is `logging.debug`ed to stderr.
 
             WARNING: Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic.
             """
diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py
index c6aa7785..88b55f7e 100644
--- a/nequip/scripts/train.py
+++ b/nequip/scripts/train.py
@@ -81,8 +81,12 @@ def main(args=None, running_as_script: bool = True):
 
 
 def parse_command_line(args=None):
-    parser = argparse.ArgumentParser(description="Train a NequIP model.")
-    parser.add_argument("config", help="configuration file")
+    parser = argparse.ArgumentParser(
+        description="Train (or restart training of) a NequIP model."
+    )
+    parser.add_argument(
+        "config", help="YAML file configuring the model, dataset, and other options"
+    )
     parser.add_argument(
         "--equivariance-test",
         help="test the model's equivariance before training on n (default 1) random frames from the dataset",

From 4a7fb109e16b04633dd16234b4cc3eb137a2c75a Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Wed, 7 Dec 2022 14:24:44 -0500
Subject: [PATCH 35/50] Revert "remove sklearn dependence (#248)"

This reverts commit 64df5e52e269fadc1480ae78c4d1682b59bb8486.
---
 CHANGELOG.md                       |   1 -
 README.md                          |   2 +-
 configs/full.yaml                  |   4 +-
 nequip/utils/regressor.py          | 235 +++++++++++++++++++++--------
 nequip/utils/unittests/conftest.py |  17 ---
 setup.py                           |   1 +
 tests/unit/data/test_dataset.py    |  32 ++--
 tests/unit/utils/test_gp.py        |  37 +++++
 tests/unit/utils/test_solver.py    |  38 -----
 9 files changed, 235 insertions(+), 132 deletions(-)
 create mode 100644 tests/unit/utils/test_gp.py
 delete mode 100644 tests/unit/utils/test_solver.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 67bd0c81..50338467 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,6 @@ Most recent change on the bottom.
 
 ## [Unreleased] - 0.5.6
 ### Added
-- sklearn dependency removed
 - `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
 - `nequip-benchmark --no-compile` and `--verbose` and `--memory-summary`
 - `nequip-benchmark --pdb` for debugging model (builder) errors
diff --git a/README.md b/README.md
index da741c09..f70840b8 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,6 @@ under the guidance of [Boris Kozinsky at Harvard](https://bkoz.seas.harvard.edu/
 If you have questions, please don't hesitate to reach out at batzner[at]g[dot]harvard[dot]edu. 
 
 If you find a bug or have a proposal for a feature, please post it in the [Issues](https://github.com/mir-group/nequip/issues).
-If you have a question, topic, or issue that isn't obviously one of those, try our [GitHub Discussions](https://github.com/mir-group/nequip/discussions).
+If you have a question, topic, or issue that isn't obviously one of those, try our [GitHub Disucssions](https://github.com/mir-group/nequip/discussions).
 
 If you want to contribute to the code, please read [`CONTRIBUTING.md`](CONTRIBUTING.md).
diff --git a/configs/full.yaml b/configs/full.yaml
index 2f98164e..daefc143 100644
--- a/configs/full.yaml
+++ b/configs/full.yaml
@@ -317,10 +317,10 @@ per_species_rescale_scales: dataset_forces_rms
 # If not provided, defaults to dataset_per_species_force_rms or dataset_per_atom_total_energy_std, depending on whether forces are being trained.
 # per_species_rescale_kwargs: 
 #   total_energy: 
-#     alpha: 0.001
+#     alpha: 0.1
 #     max_iteration: 20
 #     stride: 100
-# keywords for ridge regression decomposition of per specie energy. Optional. Defaults to 0.001. The value should be in the range of 1e-3 to 1e-2
+# keywords for GP decomposition of per specie energy. Optional. Defaults to 0.1
 # per_species_rescale_arguments_in_dataset_units: True
 # if explicit numbers are given for the shifts/scales, this parameter must specify whether the given numbers are unitless shifts/scales or are in the units of the dataset. If ``True``, any global rescalings will correctly be applied to the per-species values.
 
diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py
index 30c8f9ab..3d23cf84 100644
--- a/nequip/utils/regressor.py
+++ b/nequip/utils/regressor.py
@@ -1,72 +1,181 @@
 import logging
 import torch
+import numpy as np
+from typing import Optional
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import DotProduct, Kernel, Hyperparameter
 
-from torch import matmul
-from torch.linalg import solve, inv
-from typing import Optional, Sequence
-from opt_einsum import contract
 
+def solver(X, y, regressor: Optional[str] = "NormalizedGaussianProcess", **kwargs):
+    if regressor == "GaussianProcess":
+        return gp(X, y, **kwargs)
+    elif regressor == "NormalizedGaussianProcess":
+        return normalized_gp(X, y, **kwargs)
+    else:
+        raise NotImplementedError(f"{regressor} is not implemented")
 
-def solver(X, y, alpha: Optional[float] = 0.001, stride: Optional[int] = 1, **kwargs):
-
-    dtype = torch.get_default_dtype()
-    X = X[::stride].to(dtype)
-    y = y[::stride].to(dtype)
-
-    X, y = down_sampling_by_composition(X, y)
-
-    X_norm = torch.sum(X)
-
-    X = X / X_norm
-    y = y / X_norm
 
+def normalized_gp(X, y, **kwargs):
+    feature_rms = 1.0 / np.sqrt(np.average(X**2, axis=0))
+    feature_rms = np.nan_to_num(feature_rms, 1)
     y_mean = torch.sum(y) / torch.sum(X)
-
-    feature_rms = torch.sqrt(torch.mean(X**2, axis=0))
-
-    alpha_mat = torch.diag(feature_rms) * alpha * alpha
-
-    A = matmul(X.T, X) + alpha_mat
-    dy = y - (torch.sum(X, axis=1, keepdim=True) * y_mean).reshape(y.shape)
-    Xy = matmul(X.T, dy)
-
-    mean = solve(A, Xy)
-
-    sigma2 = torch.var(matmul(X, mean) - dy)
-    Ainv = inv(A)
-    cov = torch.sqrt(sigma2 * contract("ij,kj,kl,li->i", Ainv, X, X, Ainv))
-
-    mean = mean + y_mean.reshape([-1])
-
-    logging.debug(f"Ridge Regression, residue {sigma2}")
-
-    return mean, cov
-
-
-def down_sampling_by_composition(
-    X: torch.Tensor, y: torch.Tensor, percentage: Sequence = [0.25, 0.5, 0.75]
+    mean, std = base_gp(
+        X,
+        y - (torch.sum(X, axis=1) * y_mean).reshape(y.shape),
+        NormalizedDotProduct,
+        {"diagonal_elements": feature_rms},
+        **kwargs,
+    )
+    return mean + y_mean, std
+
+
+def gp(X, y, **kwargs):
+    return base_gp(
+        X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, **kwargs
+    )
+
+
+def base_gp(
+    X,
+    y,
+    kernel,
+    kernel_kwargs,
+    alpha: Optional[float] = 0.1,
+    max_iteration: int = 20,
+    stride: Optional[int] = None,
 ):
 
-    unique_comps, comp_ids = torch.unique(X, dim=0, return_inverse=True)
-
-    n_types = torch.max(comp_ids) + 1
-
-    sort_by = torch.argsort(comp_ids)
-
-    # find out the block for each composition
-    d_icomp = comp_ids[sort_by]
-    d_icomp = d_icomp[:-1] - d_icomp[1:]
-    node_icomp = torch.where(d_icomp != 0)[0]
-    id_start = torch.cat((torch.as_tensor([0]), node_icomp + 1))
-    id_end = torch.cat((node_icomp + 1, torch.as_tensor([len(sort_by)])))
-
-    n_points = len(percentage)
-    new_X = torch.zeros((n_types * n_points, X.shape[1]))
-    new_y = torch.zeros((n_types * n_points))
-    for i in range(n_types):
-        ids = sort_by[id_start[i] : id_end[i]]
-        for j, p in enumerate(percentage):
-            new_y[i * n_points + j] = torch.quantile(y[ids], p, interpolation="linear")
-            new_X[i * n_points + j] = unique_comps[i]
-
-    return new_X, new_y
+    if len(y.shape) == 1:
+        y = y.reshape([-1, 1])
+
+    if stride is not None:
+        X = X[::stride]
+        y = y[::stride]
+
+    not_fit = True
+    iteration = 0
+    mean = None
+    std = None
+    while not_fit:
+        logging.debug(f"GP fitting iteration {iteration} {alpha}")
+        try:
+            _kernel = kernel(**kernel_kwargs)
+            gpr = GaussianProcessRegressor(kernel=_kernel, random_state=0, alpha=alpha)
+            gpr = gpr.fit(X, y)
+
+            vec = torch.diag(torch.ones(X.shape[1]))
+            mean, std = gpr.predict(vec, return_std=True)
+
+            mean = torch.as_tensor(mean, dtype=torch.get_default_dtype()).reshape([-1])
+            # ignore all the off-diagonal terms
+            std = torch.as_tensor(std, dtype=torch.get_default_dtype()).reshape([-1])
+            likelihood = gpr.log_marginal_likelihood()
+
+            res = torch.sqrt(
+                torch.square(torch.matmul(X, mean.reshape([-1, 1])) - y).mean()
+            )
+
+            logging.debug(
+                f"GP fitting: alpha {alpha}:\n"
+                f"            residue {res}\n"
+                f"            mean {mean} std {std}\n"
+                f"            log marginal likelihood {likelihood}"
+            )
+            not_fit = False
+
+        except Exception as e:
+            logging.info(f"GP fitting failed for alpha={alpha} and {e.args}")
+            if alpha == 0 or alpha is None:
+                logging.info("try a non-zero alpha")
+                not_fit = False
+                raise ValueError(
+                    f"Please set the {alpha} to non-zero value. \n"
+                    "The dataset energy is rank deficient to be solved with GP"
+                )
+            else:
+                alpha = alpha * 2
+                iteration += 1
+                logging.debug(f"           increase alpha to {alpha}")
+
+            if iteration >= max_iteration or not_fit is False:
+                raise ValueError(
+                    "Please set the per species shift and scale to zeros and ones. \n"
+                    "The dataset energy is to diverge to be solved with GP"
+                )
+
+    return mean, std
+
+
+class NormalizedDotProduct(Kernel):
+    r"""Dot-Product kernel.
+    .. math::
+        k(x_i, x_j) = x_i \cdot A \cdot x_j
+    """
+
+    def __init__(self, diagonal_elements):
+        # TO DO: check shape
+        self.diagonal_elements = diagonal_elements
+        self.A = np.diag(diagonal_elements)
+
+    def __call__(self, X, Y=None, eval_gradient=False):
+        """Return the kernel k(X, Y) and optionally its gradient.
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples_X, n_features)
+            Left argument of the returned kernel k(X, Y)
+        Y : ndarray of shape (n_samples_Y, n_features), default=None
+            Right argument of the returned kernel k(X, Y). If None, k(X, X)
+            if evaluated instead.
+        eval_gradient : bool, default=False
+            Determines whether the gradient with respect to the log of
+            the kernel hyperparameter is computed.
+            Only supported when Y is None.
+        Returns
+        -------
+        K : ndarray of shape (n_samples_X, n_samples_Y)
+            Kernel k(X, Y)
+        K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
+                optional
+            The gradient of the kernel k(X, X) with respect to the log of the
+            hyperparameter of the kernel. Only returned when `eval_gradient`
+            is True.
+        """
+        X = np.atleast_2d(X)
+        if Y is None:
+            K = (X.dot(self.A)).dot(X.T)
+        else:
+            if eval_gradient:
+                raise ValueError("Gradient can only be evaluated when Y is None.")
+            K = (X.dot(self.A)).dot(Y.T)
+
+        if eval_gradient:
+            return K, np.empty((X.shape[0], X.shape[0], 0))
+        else:
+            return K
+
+    def diag(self, X):
+        """Returns the diagonal of the kernel k(X, X).
+        The result of this method is identical to np.diag(self(X)); however,
+        it can be evaluated more efficiently since only the diagonal is
+        evaluated.
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples_X, n_features)
+            Left argument of the returned kernel k(X, Y).
+        Returns
+        -------
+        K_diag : ndarray of shape (n_samples_X,)
+            Diagonal of kernel k(X, X).
+        """
+        return np.einsum("ij,ij,jj->i", X, X, self.A)
+
+    def __repr__(self):
+        return ""
+
+    def is_stationary(self):
+        """Returns whether the kernel is stationary."""
+        return False
+
+    @property
+    def hyperparameter_diagonal_elements(self):
+        return Hyperparameter("diagonal_elements", "numeric", "fixed")
diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py
index 77a91930..060e5e7b 100644
--- a/nequip/utils/unittests/conftest.py
+++ b/nequip/utils/unittests/conftest.py
@@ -133,22 +133,5 @@ def atomic_batch(nequip_dataset):
     return Batch.from_data_list([nequip_dataset[0], nequip_dataset[1]])
 
 
-@pytest.fixture(scope="function")
-def per_species_set():
-    dtype = torch.get_default_dtype()
-    torch.manual_seed(0)
-    mean_min = 1
-    mean_max = 100
-    std = 20
-    n_sample = 1000
-    n_species = 9
-    ref_mean = torch.rand((n_species)) * (mean_max - mean_min) + mean_min
-    t_mean = torch.ones((n_sample, 1)) * ref_mean.reshape([1, -1])
-    ref_std = torch.rand((n_species)) * std
-    t_std = torch.ones((n_sample, 1)) * ref_std.reshape([1, -1])
-    E = torch.normal(t_mean, t_std)
-    return ref_mean.to(dtype), ref_std.to(dtype), E.to(dtype), n_sample, n_species
-
-
 # Use debug mode
 set_irreps_debug(True)
diff --git a/setup.py b/setup.py
index cba6b51f..8c977e0a 100644
--- a/setup.py
+++ b/setup.py
@@ -37,6 +37,7 @@
         "typing_extensions;python_version<'3.8'",  # backport of Final
         "torch-runstats>=0.2.0",
         "torch-ema>=0.3.0",
+        "scikit_learn<=1.0.1",  # for GaussianProcess for per-species statistics; 1.0.2 has a bug!
     ],
     zip_safe=True,
 )
diff --git a/tests/unit/data/test_dataset.py b/tests/unit/data/test_dataset.py
index bad796c3..f45e0ca8 100644
--- a/tests/unit/data/test_dataset.py
+++ b/tests/unit/data/test_dataset.py
@@ -31,7 +31,7 @@ def ase_file(molecules):
 
 
 MAX_ATOMIC_NUMBER: int = 5
-NATOMS = 10
+NATOMS = 3
 
 
 @pytest.fixture(scope="function")
@@ -277,11 +277,16 @@ def test_per_node_field(self, npz_dataset, fixed_field, mode, subset):
         )
         print(result)
 
-    @pytest.mark.parametrize("alpha", [0, 1e-3, 0.01])
+    @pytest.mark.parametrize("alpha", [1e-5, 1e-3, 0.1, 0.5])
     @pytest.mark.parametrize("fixed_field", [True, False])
     @pytest.mark.parametrize("full_rank", [True, False])
     @pytest.mark.parametrize("subset", [True, False])
-    def test_per_graph_field(self, npz_dataset, alpha, fixed_field, full_rank, subset):
+    @pytest.mark.parametrize(
+        "regressor", ["NormalizedGaussianProcess", "GaussianProcess"]
+    )
+    def test_per_graph_field(
+        self, npz_dataset, alpha, fixed_field, full_rank, regressor, subset
+    ):
 
         if alpha <= 1e-4 and not full_rank:
             return
@@ -303,7 +308,10 @@ def test_per_graph_field(self, npz_dataset, alpha, fixed_field, full_rank, subse
         del n_spec
         del Ns
 
-        ref_mean, ref_std, E = generate_E(N, 100, 1000, 10)
+        if alpha == 1e-5:
+            ref_mean, ref_std, E = generate_E(N, 100, 1000, 0.0)
+        else:
+            ref_mean, ref_std, E = generate_E(N, 100, 1000, 0.5)
 
         if subset:
             E_orig_order = torch.zeros_like(
@@ -325,6 +333,7 @@ def test_per_graph_field(self, npz_dataset, alpha, fixed_field, full_rank, subse
                 AtomicDataDict.TOTAL_ENERGY_KEY
                 + "per_species_mean_std": {
                     "alpha": alpha,
+                    "regressor": regressor,
                     "stride": 1,
                 }
             },
@@ -332,18 +341,21 @@ def test_per_graph_field(self, npz_dataset, alpha, fixed_field, full_rank, subse
 
         res = torch.matmul(N, mean.reshape([-1, 1])) - E.reshape([-1, 1])
         res2 = torch.sum(torch.square(res))
-        print("alpha, residue, actual residue", alpha, res2, ref_res2)
+        print("residue", alpha, res2 - ref_res2)
         print("mean", mean, ref_mean)
         print("diff in mean", mean - ref_mean)
         print("std", std, ref_std)
 
-        tolerance = torch.max(ref_std) * 4
         if full_rank:
-            assert torch.allclose(mean, ref_mean, atol=tolerance)
-            # assert torch.allclose(std, torch.zeros_like(ref_mean), atol=alpha * 100)
+            if alpha == 1e-5:
+                assert torch.allclose(mean, ref_mean, rtol=1e-1)
+            else:
+                assert torch.allclose(mean, ref_mean, rtol=1)
+                assert torch.allclose(std, torch.zeros_like(ref_mean), atol=alpha * 100)
+        elif regressor == "NormalizedGaussianProcess":
+            assert torch.std(mean).numpy() == 0
         else:
-            assert torch.allclose(mean, mean[0], atol=tolerance)
-            # assert torch.std(mean).numpy() == 0
+            assert mean[0] == mean[1] * 2
 
 
 class TestReload:
diff --git a/tests/unit/utils/test_gp.py b/tests/unit/utils/test_gp.py
new file mode 100644
index 00000000..4792b9d2
--- /dev/null
+++ b/tests/unit/utils/test_gp.py
@@ -0,0 +1,37 @@
+import torch
+import pytest
+
+from nequip.utils.regressor import base_gp
+from sklearn.gaussian_process.kernels import DotProduct
+
+
+# @pytest.mark.parametrize("full_rank", [True, False])
+@pytest.mark.parametrize("full_rank", [False])
+@pytest.mark.parametrize("alpha", [0, 1e-3, 0.1, 1])
+def test_random(full_rank, alpha):
+
+    if alpha == 0 and not full_rank:
+        return
+
+    torch.manual_seed(0)
+    n_samples = 10
+    n_dim = 3
+
+    if full_rank:
+        X = torch.randint(low=1, high=10, size=(n_samples, n_dim))
+    else:
+        X = torch.randint(low=1, high=10, size=(n_samples, 1)) * torch.ones(
+            (n_samples, n_dim)
+        )
+
+    ref_mean = torch.rand((n_dim, 1))
+    y = torch.matmul(X, ref_mean)
+
+    mean, std = base_gp(
+        X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, alpha=0.1
+    )
+
+    if full_rank:
+        assert torch.allclose(ref_mean, mean, rtol=0.5)
+    else:
+        assert torch.allclose(mean, mean[0], rtol=1e-3)
diff --git a/tests/unit/utils/test_solver.py b/tests/unit/utils/test_solver.py
deleted file mode 100644
index 049c897d..00000000
--- a/tests/unit/utils/test_solver.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import torch
-import pytest
-
-from nequip.utils.regressor import solver
-
-
-@pytest.mark.parametrize("full_rank", [True, False])
-@pytest.mark.parametrize("alpha", [0, 1e-3, 1e-2])
-def test_random(full_rank, alpha, per_species_set):
-
-    if alpha == 0 and not full_rank:
-        return
-
-    torch.manual_seed(0)
-
-    ref_mean, ref_std, E, n_samples, n_dim = per_species_set
-
-    dtype = torch.get_default_dtype()
-
-    X = torch.randint(low=1, high=10, size=(n_samples, n_dim)).to(dtype)
-    if not full_rank:
-        X[:, n_dim - 2] = X[:, n_dim - 1] * 2
-    y = (X * E).sum(axis=-1)
-
-    mean, std = solver(X, y, alpha=alpha)
-
-    tolerance = torch.max(ref_std)
-
-    print("tolerance", tolerance)
-    print("solution", mean, std)
-    print("diff", mean - ref_mean)
-
-    if full_rank:
-        assert torch.allclose(ref_mean, mean, atol=tolerance)
-    else:
-        assert torch.allclose(mean[n_dim - 1], mean[n_dim - 2], atol=tolerance)
-
-    assert torch.max(std) < tolerance

From 332947f49a3f0b8d5e905e10d00def719dbfe19e Mon Sep 17 00:00:00 2001
From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Thu, 8 Dec 2022 05:19:02 -0500
Subject: [PATCH 36/50] Remove sklearn (recreated PR) (#277)

* change solver

* remove dependency on dataset

* add ridge tests

* swap to ridge

* add down sampling

* change to coef

* change to torch.solve

* black

* fix correlated columns

* fix sqrt error

* black

* black

* black

* add normalization

* black

* flake8

* change explaination

* add debug log

* Update tests/conftest.py

Co-authored-by: Alby M. <1473644+Linux-cpp-lisp@users.noreply.github.com>

* Update tests/conftest.py

Co-authored-by: Alby M. <1473644+Linux-cpp-lisp@users.noreply.github.com>

* Fix typo in README (#270)

* update change log

* Update tests/conftest.py

Co-authored-by: Alby M. <1473644+Linux-cpp-lisp@users.noreply.github.com>

* Update tests/conftest.py

Co-authored-by: Alby M. <1473644+Linux-cpp-lisp@users.noreply.github.com>

Co-authored-by: Lixin Sun <lixinsun@microsoft.com>
Co-authored-by: Lixin Sun <nw13mifaso@gmail.com>
Co-authored-by: Simon Batzner <simonbatzner@gmail.com>
---
 CHANGELOG.md                       |   1 +
 README.md                          |   2 +-
 configs/full.yaml                  |   4 +-
 nequip/utils/regressor.py          | 235 ++++++++---------------------
 nequip/utils/unittests/conftest.py |  17 +++
 setup.py                           |   1 -
 tests/unit/data/test_dataset.py    |  32 ++--
 tests/unit/utils/test_gp.py        |  37 -----
 tests/unit/utils/test_solver.py    |  38 +++++
 9 files changed, 132 insertions(+), 235 deletions(-)
 delete mode 100644 tests/unit/utils/test_gp.py
 create mode 100644 tests/unit/utils/test_solver.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 50338467..67bd0c81 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ Most recent change on the bottom.
 
 ## [Unreleased] - 0.5.6
 ### Added
+- sklearn dependency removed
 - `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
 - `nequip-benchmark --no-compile` and `--verbose` and `--memory-summary`
 - `nequip-benchmark --pdb` for debugging model (builder) errors
diff --git a/README.md b/README.md
index f70840b8..da741c09 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,6 @@ under the guidance of [Boris Kozinsky at Harvard](https://bkoz.seas.harvard.edu/
 If you have questions, please don't hesitate to reach out at batzner[at]g[dot]harvard[dot]edu. 
 
 If you find a bug or have a proposal for a feature, please post it in the [Issues](https://github.com/mir-group/nequip/issues).
-If you have a question, topic, or issue that isn't obviously one of those, try our [GitHub Disucssions](https://github.com/mir-group/nequip/discussions).
+If you have a question, topic, or issue that isn't obviously one of those, try our [GitHub Discussions](https://github.com/mir-group/nequip/discussions).
 
 If you want to contribute to the code, please read [`CONTRIBUTING.md`](CONTRIBUTING.md).
diff --git a/configs/full.yaml b/configs/full.yaml
index daefc143..2f98164e 100644
--- a/configs/full.yaml
+++ b/configs/full.yaml
@@ -317,10 +317,10 @@ per_species_rescale_scales: dataset_forces_rms
 # If not provided, defaults to dataset_per_species_force_rms or dataset_per_atom_total_energy_std, depending on whether forces are being trained.
 # per_species_rescale_kwargs: 
 #   total_energy: 
-#     alpha: 0.1
+#     alpha: 0.001
 #     max_iteration: 20
 #     stride: 100
-# keywords for GP decomposition of per specie energy. Optional. Defaults to 0.1
+# keywords for ridge regression decomposition of per specie energy. Optional. Defaults to 0.001. The value should be in the range of 1e-3 to 1e-2
 # per_species_rescale_arguments_in_dataset_units: True
 # if explicit numbers are given for the shifts/scales, this parameter must specify whether the given numbers are unitless shifts/scales or are in the units of the dataset. If ``True``, any global rescalings will correctly be applied to the per-species values.
 
diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py
index 3d23cf84..30c8f9ab 100644
--- a/nequip/utils/regressor.py
+++ b/nequip/utils/regressor.py
@@ -1,181 +1,72 @@
 import logging
 import torch
-import numpy as np
-from typing import Optional
-from sklearn.gaussian_process import GaussianProcessRegressor
-from sklearn.gaussian_process.kernels import DotProduct, Kernel, Hyperparameter
 
+from torch import matmul
+from torch.linalg import solve, inv
+from typing import Optional, Sequence
+from opt_einsum import contract
 
-def solver(X, y, regressor: Optional[str] = "NormalizedGaussianProcess", **kwargs):
-    if regressor == "GaussianProcess":
-        return gp(X, y, **kwargs)
-    elif regressor == "NormalizedGaussianProcess":
-        return normalized_gp(X, y, **kwargs)
-    else:
-        raise NotImplementedError(f"{regressor} is not implemented")
 
+def solver(X, y, alpha: Optional[float] = 0.001, stride: Optional[int] = 1, **kwargs):
+
+    dtype = torch.get_default_dtype()
+    X = X[::stride].to(dtype)
+    y = y[::stride].to(dtype)
+
+    X, y = down_sampling_by_composition(X, y)
+
+    X_norm = torch.sum(X)
+
+    X = X / X_norm
+    y = y / X_norm
 
-def normalized_gp(X, y, **kwargs):
-    feature_rms = 1.0 / np.sqrt(np.average(X**2, axis=0))
-    feature_rms = np.nan_to_num(feature_rms, 1)
     y_mean = torch.sum(y) / torch.sum(X)
-    mean, std = base_gp(
-        X,
-        y - (torch.sum(X, axis=1) * y_mean).reshape(y.shape),
-        NormalizedDotProduct,
-        {"diagonal_elements": feature_rms},
-        **kwargs,
-    )
-    return mean + y_mean, std
-
-
-def gp(X, y, **kwargs):
-    return base_gp(
-        X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, **kwargs
-    )
-
-
-def base_gp(
-    X,
-    y,
-    kernel,
-    kernel_kwargs,
-    alpha: Optional[float] = 0.1,
-    max_iteration: int = 20,
-    stride: Optional[int] = None,
+
+    feature_rms = torch.sqrt(torch.mean(X**2, axis=0))
+
+    alpha_mat = torch.diag(feature_rms) * alpha * alpha
+
+    A = matmul(X.T, X) + alpha_mat
+    dy = y - (torch.sum(X, axis=1, keepdim=True) * y_mean).reshape(y.shape)
+    Xy = matmul(X.T, dy)
+
+    mean = solve(A, Xy)
+
+    sigma2 = torch.var(matmul(X, mean) - dy)
+    Ainv = inv(A)
+    cov = torch.sqrt(sigma2 * contract("ij,kj,kl,li->i", Ainv, X, X, Ainv))
+
+    mean = mean + y_mean.reshape([-1])
+
+    logging.debug(f"Ridge Regression, residue {sigma2}")
+
+    return mean, cov
+
+
+def down_sampling_by_composition(
+    X: torch.Tensor, y: torch.Tensor, percentage: Sequence = [0.25, 0.5, 0.75]
 ):
 
-    if len(y.shape) == 1:
-        y = y.reshape([-1, 1])
-
-    if stride is not None:
-        X = X[::stride]
-        y = y[::stride]
-
-    not_fit = True
-    iteration = 0
-    mean = None
-    std = None
-    while not_fit:
-        logging.debug(f"GP fitting iteration {iteration} {alpha}")
-        try:
-            _kernel = kernel(**kernel_kwargs)
-            gpr = GaussianProcessRegressor(kernel=_kernel, random_state=0, alpha=alpha)
-            gpr = gpr.fit(X, y)
-
-            vec = torch.diag(torch.ones(X.shape[1]))
-            mean, std = gpr.predict(vec, return_std=True)
-
-            mean = torch.as_tensor(mean, dtype=torch.get_default_dtype()).reshape([-1])
-            # ignore all the off-diagonal terms
-            std = torch.as_tensor(std, dtype=torch.get_default_dtype()).reshape([-1])
-            likelihood = gpr.log_marginal_likelihood()
-
-            res = torch.sqrt(
-                torch.square(torch.matmul(X, mean.reshape([-1, 1])) - y).mean()
-            )
-
-            logging.debug(
-                f"GP fitting: alpha {alpha}:\n"
-                f"            residue {res}\n"
-                f"            mean {mean} std {std}\n"
-                f"            log marginal likelihood {likelihood}"
-            )
-            not_fit = False
-
-        except Exception as e:
-            logging.info(f"GP fitting failed for alpha={alpha} and {e.args}")
-            if alpha == 0 or alpha is None:
-                logging.info("try a non-zero alpha")
-                not_fit = False
-                raise ValueError(
-                    f"Please set the {alpha} to non-zero value. \n"
-                    "The dataset energy is rank deficient to be solved with GP"
-                )
-            else:
-                alpha = alpha * 2
-                iteration += 1
-                logging.debug(f"           increase alpha to {alpha}")
-
-            if iteration >= max_iteration or not_fit is False:
-                raise ValueError(
-                    "Please set the per species shift and scale to zeros and ones. \n"
-                    "The dataset energy is to diverge to be solved with GP"
-                )
-
-    return mean, std
-
-
-class NormalizedDotProduct(Kernel):
-    r"""Dot-Product kernel.
-    .. math::
-        k(x_i, x_j) = x_i \cdot A \cdot x_j
-    """
-
-    def __init__(self, diagonal_elements):
-        # TO DO: check shape
-        self.diagonal_elements = diagonal_elements
-        self.A = np.diag(diagonal_elements)
-
-    def __call__(self, X, Y=None, eval_gradient=False):
-        """Return the kernel k(X, Y) and optionally its gradient.
-        Parameters
-        ----------
-        X : ndarray of shape (n_samples_X, n_features)
-            Left argument of the returned kernel k(X, Y)
-        Y : ndarray of shape (n_samples_Y, n_features), default=None
-            Right argument of the returned kernel k(X, Y). If None, k(X, X)
-            if evaluated instead.
-        eval_gradient : bool, default=False
-            Determines whether the gradient with respect to the log of
-            the kernel hyperparameter is computed.
-            Only supported when Y is None.
-        Returns
-        -------
-        K : ndarray of shape (n_samples_X, n_samples_Y)
-            Kernel k(X, Y)
-        K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
-                optional
-            The gradient of the kernel k(X, X) with respect to the log of the
-            hyperparameter of the kernel. Only returned when `eval_gradient`
-            is True.
-        """
-        X = np.atleast_2d(X)
-        if Y is None:
-            K = (X.dot(self.A)).dot(X.T)
-        else:
-            if eval_gradient:
-                raise ValueError("Gradient can only be evaluated when Y is None.")
-            K = (X.dot(self.A)).dot(Y.T)
-
-        if eval_gradient:
-            return K, np.empty((X.shape[0], X.shape[0], 0))
-        else:
-            return K
-
-    def diag(self, X):
-        """Returns the diagonal of the kernel k(X, X).
-        The result of this method is identical to np.diag(self(X)); however,
-        it can be evaluated more efficiently since only the diagonal is
-        evaluated.
-        Parameters
-        ----------
-        X : ndarray of shape (n_samples_X, n_features)
-            Left argument of the returned kernel k(X, Y).
-        Returns
-        -------
-        K_diag : ndarray of shape (n_samples_X,)
-            Diagonal of kernel k(X, X).
-        """
-        return np.einsum("ij,ij,jj->i", X, X, self.A)
-
-    def __repr__(self):
-        return ""
-
-    def is_stationary(self):
-        """Returns whether the kernel is stationary."""
-        return False
-
-    @property
-    def hyperparameter_diagonal_elements(self):
-        return Hyperparameter("diagonal_elements", "numeric", "fixed")
+    unique_comps, comp_ids = torch.unique(X, dim=0, return_inverse=True)
+
+    n_types = torch.max(comp_ids) + 1
+
+    sort_by = torch.argsort(comp_ids)
+
+    # find out the block for each composition
+    d_icomp = comp_ids[sort_by]
+    d_icomp = d_icomp[:-1] - d_icomp[1:]
+    node_icomp = torch.where(d_icomp != 0)[0]
+    id_start = torch.cat((torch.as_tensor([0]), node_icomp + 1))
+    id_end = torch.cat((node_icomp + 1, torch.as_tensor([len(sort_by)])))
+
+    n_points = len(percentage)
+    new_X = torch.zeros((n_types * n_points, X.shape[1]))
+    new_y = torch.zeros((n_types * n_points))
+    for i in range(n_types):
+        ids = sort_by[id_start[i] : id_end[i]]
+        for j, p in enumerate(percentage):
+            new_y[i * n_points + j] = torch.quantile(y[ids], p, interpolation="linear")
+            new_X[i * n_points + j] = unique_comps[i]
+
+    return new_X, new_y
diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py
index 060e5e7b..77a91930 100644
--- a/nequip/utils/unittests/conftest.py
+++ b/nequip/utils/unittests/conftest.py
@@ -133,5 +133,22 @@ def atomic_batch(nequip_dataset):
     return Batch.from_data_list([nequip_dataset[0], nequip_dataset[1]])
 
 
+@pytest.fixture(scope="function")
+def per_species_set():
+    dtype = torch.get_default_dtype()
+    torch.manual_seed(0)
+    mean_min = 1
+    mean_max = 100
+    std = 20
+    n_sample = 1000
+    n_species = 9
+    ref_mean = torch.rand((n_species)) * (mean_max - mean_min) + mean_min
+    t_mean = torch.ones((n_sample, 1)) * ref_mean.reshape([1, -1])
+    ref_std = torch.rand((n_species)) * std
+    t_std = torch.ones((n_sample, 1)) * ref_std.reshape([1, -1])
+    E = torch.normal(t_mean, t_std)
+    return ref_mean.to(dtype), ref_std.to(dtype), E.to(dtype), n_sample, n_species
+
+
 # Use debug mode
 set_irreps_debug(True)
diff --git a/setup.py b/setup.py
index 8c977e0a..cba6b51f 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,6 @@
         "typing_extensions;python_version<'3.8'",  # backport of Final
         "torch-runstats>=0.2.0",
         "torch-ema>=0.3.0",
-        "scikit_learn<=1.0.1",  # for GaussianProcess for per-species statistics; 1.0.2 has a bug!
     ],
     zip_safe=True,
 )
diff --git a/tests/unit/data/test_dataset.py b/tests/unit/data/test_dataset.py
index f45e0ca8..bad796c3 100644
--- a/tests/unit/data/test_dataset.py
+++ b/tests/unit/data/test_dataset.py
@@ -31,7 +31,7 @@ def ase_file(molecules):
 
 
 MAX_ATOMIC_NUMBER: int = 5
-NATOMS = 3
+NATOMS = 10
 
 
 @pytest.fixture(scope="function")
@@ -277,16 +277,11 @@ def test_per_node_field(self, npz_dataset, fixed_field, mode, subset):
         )
         print(result)
 
-    @pytest.mark.parametrize("alpha", [1e-5, 1e-3, 0.1, 0.5])
+    @pytest.mark.parametrize("alpha", [0, 1e-3, 0.01])
     @pytest.mark.parametrize("fixed_field", [True, False])
     @pytest.mark.parametrize("full_rank", [True, False])
     @pytest.mark.parametrize("subset", [True, False])
-    @pytest.mark.parametrize(
-        "regressor", ["NormalizedGaussianProcess", "GaussianProcess"]
-    )
-    def test_per_graph_field(
-        self, npz_dataset, alpha, fixed_field, full_rank, regressor, subset
-    ):
+    def test_per_graph_field(self, npz_dataset, alpha, fixed_field, full_rank, subset):
 
         if alpha <= 1e-4 and not full_rank:
             return
@@ -308,10 +303,7 @@ def test_per_graph_field(
         del n_spec
         del Ns
 
-        if alpha == 1e-5:
-            ref_mean, ref_std, E = generate_E(N, 100, 1000, 0.0)
-        else:
-            ref_mean, ref_std, E = generate_E(N, 100, 1000, 0.5)
+        ref_mean, ref_std, E = generate_E(N, 100, 1000, 10)
 
         if subset:
             E_orig_order = torch.zeros_like(
@@ -333,7 +325,6 @@ def test_per_graph_field(
                 AtomicDataDict.TOTAL_ENERGY_KEY
                 + "per_species_mean_std": {
                     "alpha": alpha,
-                    "regressor": regressor,
                     "stride": 1,
                 }
             },
@@ -341,21 +332,18 @@ def test_per_graph_field(
 
         res = torch.matmul(N, mean.reshape([-1, 1])) - E.reshape([-1, 1])
         res2 = torch.sum(torch.square(res))
-        print("residue", alpha, res2 - ref_res2)
+        print("alpha, residue, actual residue", alpha, res2, ref_res2)
         print("mean", mean, ref_mean)
         print("diff in mean", mean - ref_mean)
         print("std", std, ref_std)
 
+        tolerance = torch.max(ref_std) * 4
         if full_rank:
-            if alpha == 1e-5:
-                assert torch.allclose(mean, ref_mean, rtol=1e-1)
-            else:
-                assert torch.allclose(mean, ref_mean, rtol=1)
-                assert torch.allclose(std, torch.zeros_like(ref_mean), atol=alpha * 100)
-        elif regressor == "NormalizedGaussianProcess":
-            assert torch.std(mean).numpy() == 0
+            assert torch.allclose(mean, ref_mean, atol=tolerance)
+            # assert torch.allclose(std, torch.zeros_like(ref_mean), atol=alpha * 100)
         else:
-            assert mean[0] == mean[1] * 2
+            assert torch.allclose(mean, mean[0], atol=tolerance)
+            # assert torch.std(mean).numpy() == 0
 
 
 class TestReload:
diff --git a/tests/unit/utils/test_gp.py b/tests/unit/utils/test_gp.py
deleted file mode 100644
index 4792b9d2..00000000
--- a/tests/unit/utils/test_gp.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import torch
-import pytest
-
-from nequip.utils.regressor import base_gp
-from sklearn.gaussian_process.kernels import DotProduct
-
-
-# @pytest.mark.parametrize("full_rank", [True, False])
-@pytest.mark.parametrize("full_rank", [False])
-@pytest.mark.parametrize("alpha", [0, 1e-3, 0.1, 1])
-def test_random(full_rank, alpha):
-
-    if alpha == 0 and not full_rank:
-        return
-
-    torch.manual_seed(0)
-    n_samples = 10
-    n_dim = 3
-
-    if full_rank:
-        X = torch.randint(low=1, high=10, size=(n_samples, n_dim))
-    else:
-        X = torch.randint(low=1, high=10, size=(n_samples, 1)) * torch.ones(
-            (n_samples, n_dim)
-        )
-
-    ref_mean = torch.rand((n_dim, 1))
-    y = torch.matmul(X, ref_mean)
-
-    mean, std = base_gp(
-        X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, alpha=0.1
-    )
-
-    if full_rank:
-        assert torch.allclose(ref_mean, mean, rtol=0.5)
-    else:
-        assert torch.allclose(mean, mean[0], rtol=1e-3)
diff --git a/tests/unit/utils/test_solver.py b/tests/unit/utils/test_solver.py
new file mode 100644
index 00000000..049c897d
--- /dev/null
+++ b/tests/unit/utils/test_solver.py
@@ -0,0 +1,38 @@
+import torch
+import pytest
+
+from nequip.utils.regressor import solver
+
+
+@pytest.mark.parametrize("full_rank", [True, False])
+@pytest.mark.parametrize("alpha", [0, 1e-3, 1e-2])
+def test_random(full_rank, alpha, per_species_set):
+
+    if alpha == 0 and not full_rank:
+        return
+
+    torch.manual_seed(0)
+
+    ref_mean, ref_std, E, n_samples, n_dim = per_species_set
+
+    dtype = torch.get_default_dtype()
+
+    X = torch.randint(low=1, high=10, size=(n_samples, n_dim)).to(dtype)
+    if not full_rank:
+        X[:, n_dim - 2] = X[:, n_dim - 1] * 2
+    y = (X * E).sum(axis=-1)
+
+    mean, std = solver(X, y, alpha=alpha)
+
+    tolerance = torch.max(ref_std)
+
+    print("tolerance", tolerance)
+    print("solution", mean, std)
+    print("diff", mean - ref_mean)
+
+    if full_rank:
+        assert torch.allclose(ref_mean, mean, atol=tolerance)
+    else:
+        assert torch.allclose(mean[n_dim - 1], mean[n_dim - 2], atol=tolerance)
+
+    assert torch.max(std) < tolerance

From 4f89a431ea6e1c47f4e065943cc8968c37140792 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Sun, 11 Dec 2022 21:54:15 -0500
Subject: [PATCH 37/50] nequip-benchmark --model

---
 nequip/scripts/benchmark.py | 42 ++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index 5814a838..16776652 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -16,7 +16,7 @@
 from nequip.utils import Config
 from nequip.data import AtomicData, AtomicDataDict, dataset_from_config
 from nequip.model import model_from_config
-from nequip.scripts.deploy import _compile_for_deploy
+from nequip.scripts.deploy import _compile_for_deploy, load_deployed_model
 from nequip.scripts.train import default_config, check_code_version
 from nequip.utils._global_options import _set_global_options
 
@@ -28,6 +28,12 @@ def main(args=None):
         )
     )
     parser.add_argument("config", help="configuration file")
+    parser.add_argument(
+        "--model",
+        help="A deployed model to load instead of building a new one from `config`. ",
+        type=str,
+        default=None,
+    )
     parser.add_argument(
         "--profile",
         help="Profile instead of timing, creating and outputing a Chrome trace JSON to the given path.",
@@ -146,17 +152,29 @@ def main(args=None):
         return
 
     # Load model:
-    print("Building model... ")
-    model_time = time.time()
-    try:
-        model = model_from_config(config, initialize=True, dataset=dataset, deploy=True)
-    except:  # noqa: E722
-        if args.pdb:
-            pdb.post_mortem()
-        else:
-            raise
-    model_time = time.time() - model_time
-    print(f"    building model took {model_time:.4f}s")
+    if args.model is None:
+        print("Building model... ")
+        model_time = time.time()
+        try:
+            model = model_from_config(
+                config, initialize=True, dataset=dataset, deploy=True
+            )
+        except:  # noqa: E722
+            if args.pdb:
+                pdb.post_mortem()
+            else:
+                raise
+        model_time = time.time() - model_time
+        print(f"    building model took {model_time:.4f}s")
+    else:
+        print("Loading model...")
+        model, metadata = load_deployed_model(args.model, device=device, freeze=False)
+        print("    deployed model has metadata:")
+        print(
+            "\n".join(
+                "        %s: %s" % e for e in metadata.items() if e[0] != "config"
+            )
+        )
     print(f"    model has {sum(p.numel() for p in model.parameters())} weights")
     print(
         f"    model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable weights"

From ba214d093164f0dd38165c3974210371adf8b82c Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Sun, 11 Dec 2022 22:03:29 -0500
Subject: [PATCH 38/50] nequip-benchmark --equivariance-test

---
 nequip/scripts/benchmark.py | 40 +++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index 16776652..ab95548a 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -14,6 +14,7 @@
 from e3nn.util.jit import script
 
 from nequip.utils import Config
+from nequip.utils.test import assert_AtomicData_equivariant
 from nequip.data import AtomicData, AtomicDataDict, dataset_from_config
 from nequip.model import model_from_config
 from nequip.scripts.deploy import _compile_for_deploy, load_deployed_model
@@ -40,6 +41,11 @@ def main(args=None):
         type=str,
         default=None,
     )
+    parser.add_argument(
+        "--equivariance-test",
+        help="test the model's equivariance on `--n-data` frames.",
+        action="store_true",
+    )
     parser.add_argument(
         "--device",
         help="Device to run the model on. If not provided, defaults to CUDA if available and CPU otherwise.",
@@ -111,12 +117,12 @@ def main(args=None):
     print(f"    loading dataset took {dataset_time:.4f}s")
     dataset_rng = torch.Generator()
     dataset_rng.manual_seed(config.get("dataset_seed", config.get("seed", 12345)))
-    datas = [
+    datas_list = [
         AtomicData.to_AtomicDataDict(dataset[i].to(device))
         for i in torch.randperm(len(dataset), generator=dataset_rng)[: args.n_data]
     ]
-    n_atom: int = len(datas[0]["pos"])
-    if not all(len(d["pos"]) == n_atom for d in datas):
+    n_atom: int = len(datas_list[0]["pos"])
+    if not all(len(d["pos"]) == n_atom for d in datas_list):
         raise NotImplementedError(
             "nequip-benchmark does not currently handle benchmarking on data frames with variable number of atoms"
         )
@@ -128,7 +134,7 @@ def main(args=None):
     print(f"         number of atoms: {n_atom}")
     print(f"         number of types: {dataset.type_mapper.num_types}")
     print(
-        f"          avg. num edges: {sum(d[AtomicDataDict.EDGE_INDEX_KEY].shape[1] for d in datas) / len(datas)}"
+        f"          avg. num edges: {sum(d[AtomicDataDict.EDGE_INDEX_KEY].shape[1] for d in datas_list) / len(datas_list)}"
     )
     avg_edges_per_atom = torch.mean(
         torch.cat(
@@ -137,14 +143,14 @@ def main(args=None):
                     d[AtomicDataDict.EDGE_INDEX_KEY][0],
                     minlength=d[AtomicDataDict.POSITIONS_KEY].shape[0],
                 ).float()
-                for d in datas
+                for d in datas_list
             ]
         )
     ).item()
     print(f"         avg. neigh/atom: {avg_edges_per_atom}")
 
     # cycle over the datas we loaded
-    datas = itertools.cycle(datas)
+    datas = itertools.cycle(datas_list)
 
     # short circut
     if args.n == 0:
@@ -184,6 +190,11 @@ def main(args=None):
     )
 
     model.eval()
+    if args.equivariance_test:
+        args.no_compile = True
+        if args.model is not None:
+            raise RuntimeError("Can't equivariance test a deployed model.")
+
     if args.no_compile:
         model = model.to(device)
     else:
@@ -214,7 +225,7 @@ def trace_handler(p):
             p.export_chrome_trace(args.profile)
             print(f"Wrote profiling trace to `{args.profile}`")
 
-        print("Starting...")
+        print("Starting profiling...")
         with torch.profiler.profile(
             activities=[
                 torch.profiler.ProfilerActivity.CPU,
@@ -236,6 +247,19 @@ def trace_handler(p):
         except:  # noqa: E722)
             pdb.post_mortem()
         print("Done.")
+    elif args.equivariance_test:
+        print("Running equivariance test...")
+        errstr = assert_AtomicData_equivariant(model, datas_list)
+        print(
+            "    Equivariance test passed; equivariance errors:\n"
+            "    Errors are in real units, where relevant.\n"
+            "    Please note that the large scale of the typical\n"
+            "    shifts to the (atomic) energy can cause\n"
+            "    catastrophic cancellation and give incorrectly\n"
+            "    the equivariance error as zero for those fields.\n"
+            f"{errstr}"
+        )
+        del errstr
     else:
         print("Warmup...")
         warmup_time = time.time()
@@ -244,7 +268,7 @@ def trace_handler(p):
         warmup_time = time.time() - warmup_time
         print(f"    {warmup} calls of warmup took {warmup_time:.4f}s")
 
-        print("Starting...")
+        print("Benchmarking...")
         # just time
         t = Timer(
             stmt="model(next(datas).copy())", globals={"model": model, "datas": datas}

From a4d7b3a1e26f64b5bdedfe96bbff1e9c8a134b25 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Sun, 11 Dec 2022 22:27:20 -0500
Subject: [PATCH 39/50] update config

---
 configs/minimal_toy_emt.yaml | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/configs/minimal_toy_emt.yaml b/configs/minimal_toy_emt.yaml
index c9c904d1..38b7f95d 100644
--- a/configs/minimal_toy_emt.yaml
+++ b/configs/minimal_toy_emt.yaml
@@ -6,15 +6,18 @@ dataset_seed: 456
 
 # network
 model_builders:
+  - SimpleIrrepsConfig
   - EnergyModel
   - PerSpeciesRescale
   - StressForceOutput
   - RescaleEnergyEtc
+
 num_basis: 8
 r_max: 4.0
-irreps_edge_sh: 0e + 1o
-conv_to_output_hidden_irreps_out: 16x0e
-feature_irreps_hidden: 16x0o + 16x0e + 16x1o + 16x1e
+l_max: 1
+parity: true
+num_features: 16
+num_layers: 4
 
 # data set
 dataset: EMTTest                                                                       # type of data set, can be npz or ase
@@ -23,10 +26,6 @@ dataset_num_frames: 100
 chemical_symbols:
   - Cu
 
-global_rescale_scale: dataset_total_energy_std
-per_species_rescale_shifts: dataset_per_atom_total_energy_mean
-per_species_rescale_scales: dataset_per_atom_total_energy_std
-
 # logging
 wandb: false
 # verbose: debug

From 838e164f309cc056fb44e560d99ddd34fe8decdc Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Wed, 14 Dec 2022 23:43:42 -0500
Subject: [PATCH 40/50] better nequip-deploy info

---
 CHANGELOG.md             |  1 +
 nequip/scripts/deploy.py | 30 ++++++++++++++++++++++++------
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 67bd0c81..1a7bb63c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ Most recent change on the bottom.
 - `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights
 - `nequip-benchmark --no-compile` and `--verbose` and `--memory-summary`
 - `nequip-benchmark --pdb` for debugging model (builder) errors
+- More information in `nequip-deploy info`
 
 ### Fixed
 - Git utilities when installed as ZIPed `.egg` (#264)
diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py
index 4d7686e1..394c0005 100644
--- a/nequip/scripts/deploy.py
+++ b/nequip/scripts/deploy.py
@@ -9,6 +9,7 @@
 import pathlib
 import logging
 import yaml
+import itertools
 
 # This is a weird hack to avoid Intel MKL issues on the cluster when this is called as a subprocess of a process that has itself initialized PyTorch.
 # Since numpy gets imported later anyway for dataset stuff, this shouldn't affect performance.
@@ -146,6 +147,11 @@ def main(args=None):
         help="Path to a deployed model file.",
         type=pathlib.Path,
     )
+    info_parser.add_argument(
+        "--print-config",
+        help="Print the full config of the model.",
+        action="store_true",
+    )
 
     build_parser = subparsers.add_parser("build", help="Build a deployment model")
     build_parser.add_argument(
@@ -169,13 +175,25 @@ def main(args=None):
     logging.basicConfig(level=getattr(logging, args.verbose.upper()))
 
     if args.command == "info":
-        model, metadata = load_deployed_model(args.model_path, set_global_options=False)
-        del model
+        model, metadata = load_deployed_model(
+            args.model_path, set_global_options=False, freeze=False
+        )
         config = metadata.pop(CONFIG_KEY)
-        metadata_str = "\n".join("  %s: %s" % e for e in metadata.items())
-        logging.info(f"Loaded TorchScript model with metadata:\n{metadata_str}\n")
-        logging.info("Model was built with config:")
-        print(config)
+        if args.print_config:
+            print(config)
+        else:
+            metadata_str = "\n".join("  %s: %s" % e for e in metadata.items())
+            logging.info(f"Loaded TorchScript model with metadata:\n{metadata_str}\n")
+            logging.info(
+                f"Model has {sum(p.numel() for p in model.parameters())} weights"
+            )
+            logging.info(
+                f"Model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable weights"
+            )
+            logging.info(
+                f"Model weights and buffers take {sum(p.numel() * p.element_size() for p in itertools.chain(model.parameters(), model.buffers())) / (1024 * 1024):.2f} MB"
+            )
+            logging.debug(f"Model had config:\n{config}")
 
     elif args.command == "build":
         if args.model and args.train_dir:

From 4691fd6cb1c0f183c4360c0e8aac30810dd4cc17 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Thu, 15 Dec 2022 10:54:08 -0700
Subject: [PATCH 41/50] fix benchmark pdb

---
 nequip/scripts/benchmark.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index ab95548a..38690e33 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -6,6 +6,7 @@
 import logging
 import sys
 import pdb
+import traceback
 
 import torch
 from torch.utils.benchmark import Timer, Measurement
@@ -167,6 +168,7 @@ def main(args=None):
             )
         except:  # noqa: E722
             if args.pdb:
+                traceback.print_exc()
                 pdb.post_mortem()
             else:
                 raise
@@ -244,7 +246,8 @@ def trace_handler(p):
         try:
             for _ in range(args.n):
                 model(next(datas).copy())
-        except:  # noqa: E722)
+        except:  # noqa: E722
+            traceback.print_exc()
             pdb.post_mortem()
         print("Done.")
     elif args.equivariance_test:

From 7faad2386ee74cd14408d6d58ccfe3a24cfd2411 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Thu, 15 Dec 2022 22:18:10 -0500
Subject: [PATCH 42/50] print complete information during equivariance failures

---
 CHANGELOG.md         |   4 ++
 nequip/utils/test.py | 114 +++++++++++++++++--------------------------
 setup.py             |   2 +-
 3 files changed, 49 insertions(+), 71 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1a7bb63c..20146a16 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,10 @@ Most recent change on the bottom.
 - `nequip-benchmark --pdb` for debugging model (builder) errors
 - More information in `nequip-deploy info`
 
+### Changed
+- Minimum e3nn is now 0.4.4
+- `--equivariance-test` now prints much more information, especially when there is a failure
+
 ### Fixed
 - Git utilities when installed as ZIPed `.egg` (#264)
 
diff --git a/nequip/utils/test.py b/nequip/utils/test.py
index edf2c1e8..60e68730 100644
--- a/nequip/utils/test.py
+++ b/nequip/utils/test.py
@@ -28,7 +28,7 @@ def assert_permutation_equivariant(
     data_in: AtomicDataDict.Type,
     tolerance: Optional[float] = None,
     raise_error: bool = True,
-):
+) -> str:
     r"""Test the permutation equivariance of ``func``.
 
     Standard fields are assumed to be equivariant to node or edge permutations according to their standard interpretions; all other fields are assumed to be invariant to all permutations. Non-standard fields can be registered as node/edge permutation equivariant using ``register_fields``.
@@ -93,38 +93,42 @@ def assert_permutation_equivariant(
         out_perm.keys()
     ), "Permutation changed the set of fields returned by model"
 
-    problems = []
+    messages = []
+    num_problems: int = 0
     for k in out_orig.keys():
         if k in node_permute_fields:
-            if not torch.allclose(out_orig[k][node_perm], out_perm[k], atol=atol):
-                err = (out_orig[k][node_perm] - out_perm[k]).abs().max()
-                problems.append(
-                    f"node permutation equivariance violated for field {k}; maximum componentwise error: {err:e}"
-                )
+            err = (out_orig[k][node_perm] - out_perm[k]).abs().max()
+            fail = not torch.allclose(out_orig[k][node_perm], out_perm[k], atol=atol)
+            if fail:
+                num_problems += 1
+            messages.append(
+                f"   node permutation equivariance of field {k:20}       -> max error={err:.3e}{'  FAIL' if fail else ''}"
+            )
         elif k in edge_permute_fields:
-            if not torch.allclose(out_orig[k][edge_perm], out_perm[k], atol=atol):
-                err = (out_orig[k][edge_perm] - out_perm[k]).abs().max()
-                problems.append(
-                    f"edge permutation equivariance violated for field {k}; maximum componentwise error: {err:e}"
-                )
+            err = (out_orig[k][edge_perm] - out_perm[k]).abs().max()
+            fail = not torch.allclose(out_orig[k][edge_perm], out_perm[k], atol=atol)
+            if fail:
+                num_problems += 1
+            messages.append(
+                f"   edge permutation equivariance of field {k:20}       -> max error={err:.3e}{'  FAIL' if fail else ''}"
+            )
         elif k == AtomicDataDict.EDGE_INDEX_KEY:
             pass
         else:
             # Assume invariant
             if out_orig[k].dtype == torch.bool:
-                if not torch.all(out_orig[k] == out_perm[k]):
-                    problems.append(
-                        f"edge/node permutation invariance violated for field {k} ({k} was assumed to be invariant, should it have been marked as equivariant?)"
-                    )
+                err = (out_orig[k] != out_perm[k]).max()
             else:
-                if not torch.allclose(out_orig[k], out_perm[k], atol=atol):
-                    err = (out_orig[k] - out_perm[k]).abs().max()
-                    problems.append(
-                        f"edge/node permutation invariance violated for field {k}; maximum componentwise error: {err:e}. (`{k}` was assumed to be invariant, should it have been marked as equivariant?)"
-                    )
-    msg = "\n".join(problems)
-    if len(problems) == 0:
-        return
+                err = (out_orig[k] - out_perm[k]).abs().max()
+            fail = not torch.allclose(out_orig[k], out_perm[k], atol=atol)
+            if fail:
+                num_problems += 1
+            messages.append(
+                f"   edge & node permutation invariance for field {k:20} -> max error={err:.3e}{'  FAIL' if fail else ''}"
+            )
+    msg = "\n".join(messages)
+    if num_problems == 0:
+        return msg
     else:
         if raise_error:
             raise AssertionError(msg)
@@ -169,7 +173,7 @@ def assert_AtomicData_equivariant(
 
     # == Test permutation of graph nodes ==
     # since permutation is discrete and should not be data dependent, run only on one frame.
-    permutation_problems = assert_permutation_equivariant(
+    permutation_message = assert_permutation_equivariant(
         func, data_in[0], tolerance=permutation_tolerance, raise_error=False
     )
 
@@ -255,53 +259,23 @@ def wrapper(*args):
 
     if o3_tolerance is None:
         o3_tolerance = FLOAT_TOLERANCE[torch.get_default_dtype()]
-    anerr = next(iter(errs.values()))
-    if isinstance(anerr, float) or anerr.ndim == 0:
-        # old e3nn doesn't report which key
-        problems = {k: v for k, v in errs.items() if v > o3_tolerance}
-
-        def _describe(errors):
-            return (
-                permutation_problems + "\n" if permutation_problems is not None else ""
-            ) + "\n".join(
-                "(parity_k={:d}, did_translate={}) -> max error={:.3e}".format(
-                    int(k[0]),
-                    bool(k[1]),
-                    float(v),
-                )
-                for k, v in errors.items()
-            )
-
-        if len(problems) > 0 or permutation_problems is not None:
-            raise AssertionError(
-                "Equivariance test failed for cases:" + _describe(problems)
-            )
-
-        return _describe(errs)
-    else:
-        # it's newer and tells us which is which
-        all_errs = []
-        for case, err in errs.items():
-            for key, this_err in zip(irreps_out.keys(), err):
-                all_errs.append(case + (key, this_err))
-        problems = [e for e in all_errs if e[-1] > o3_tolerance]
-
-        def _describe(errors):
-            return (
-                permutation_problems + "\n" if permutation_problems is not None else ""
-            ) + "\n".join(
-                "   (parity_k={:1d}, did_translate={:5}, field={:20}) -> max error={:.3e}".format(
-                    int(k[0]), str(bool(k[1])), str(k[2]), float(k[3])
-                )
-                for k in errors
-            )
+    all_errs = []
+    for case, err in errs.items():
+        for key, this_err in zip(irreps_out.keys(), err):
+            all_errs.append(case + (key, this_err))
+    is_problem = [e[-1] > o3_tolerance for e in all_errs]
+
+    message = (permutation_message + "\n") + "\n".join(
+        "   (parity_k={:1d}, did_translate={:5}, field={:20})     -> max error={:.3e}".format(
+            int(k[0]), str(bool(k[1])), str(k[2]), float(k[3])
+        )
+        for k, prob in zip(all_errs, is_problem)
+    )
 
-        if len(problems) > 0 or permutation_problems is not None:
-            raise AssertionError(
-                "Equivariance test failed for cases:\n" + _describe(problems)
-            )
+    if sum(is_problem) > 0 or "FAIL" in permutation_message:
+        raise AssertionError(f"Equivariance test failed for cases:\n{message}")
 
-        return _describe(all_errs)
+    return message
 
 
 _DEBUG_HOOKS = None
diff --git a/setup.py b/setup.py
index cba6b51f..7583dea3 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@
         "ase",
         "tqdm",
         "torch>=1.8,<=1.12,!=1.9.0",  # torch.fx added in 1.8
-        "e3nn>=0.3.5,<0.6.0",
+        "e3nn>=0.4,4,<0.6.0",
         "pyyaml",
         "contextlib2;python_version<'3.7'",  # backport of nullcontext
         'contextvars;python_version<"3.7"',  # backport of contextvars for savenload

From 5a33ecc06d4d83efd2073b6da2dc5a8cdda07c28 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Thu, 15 Dec 2022 22:18:25 -0500
Subject: [PATCH 43/50] warmup to test equivariance of JITed model

---
 nequip/scripts/benchmark.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/nequip/scripts/benchmark.py b/nequip/scripts/benchmark.py
index 38690e33..1deb0de2 100644
--- a/nequip/scripts/benchmark.py
+++ b/nequip/scripts/benchmark.py
@@ -251,6 +251,12 @@ def trace_handler(p):
             pdb.post_mortem()
         print("Done.")
     elif args.equivariance_test:
+        print("Warmup...")
+        warmup_time = time.time()
+        for _ in range(warmup):
+            model(next(datas).copy())
+        warmup_time = time.time() - warmup_time
+        print(f"    {warmup} calls of warmup took {warmup_time:.4f}s")
         print("Running equivariance test...")
         errstr = assert_AtomicData_equivariant(model, datas_list)
         print(

From c42f3d2285dbe7d9ff315ae16940452bf0fbe26b Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Fri, 16 Dec 2022 02:04:50 -0500
Subject: [PATCH 44/50] typo

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7583dea3..24d37e24 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@
         "ase",
         "tqdm",
         "torch>=1.8,<=1.12,!=1.9.0",  # torch.fx added in 1.8
-        "e3nn>=0.4,4,<0.6.0",
+        "e3nn>=0.4.4,<0.6.0",
         "pyyaml",
         "contextlib2;python_version<'3.7'",  # backport of nullcontext
         'contextvars;python_version<"3.7"',  # backport of contextvars for savenload

From 0e0f4ee278ac5f363ef1db61fb6553fbe2fdd8f0 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Sun, 18 Dec 2022 23:12:35 -0700
Subject: [PATCH 45/50] bump

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20146a16..cf50972d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 Most recent change on the bottom.
 
 
-## [Unreleased] - 0.5.6
+## [0.5.6] - 2022-12-19
 ### Added
 - sklearn dependency removed
 - `nequip-benchmark` and `nequip-train` report number of weights and number of trainable weights

From 0f22b7b7d171cbf15aa801d083c3899568ea42c6 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 19 Dec 2022 14:00:35 -0700
Subject: [PATCH 46/50] PyTorch 1.10 compatibility

---
 nequip/utils/regressor.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py
index 30c8f9ab..9db6c86f 100644
--- a/nequip/utils/regressor.py
+++ b/nequip/utils/regressor.py
@@ -66,7 +66,13 @@ def down_sampling_by_composition(
     for i in range(n_types):
         ids = sort_by[id_start[i] : id_end[i]]
         for j, p in enumerate(percentage):
-            new_y[i * n_points + j] = torch.quantile(y[ids], p, interpolation="linear")
+            # We comment out `interpolation="linear"` for PyTorch 1.10,
+            # where linear is the default but the `interpolation` option
+            # does not exist:
+            # https://pytorch.org/docs/1.10/generated/torch.quantile.html?highlight=quantile#torch.quantile
+            # https://pytorch.org/docs/1.13/generated/torch.quantile.html?highlight=quantile#torch.quantile
+            # previously, call had `, interpolation="linear")`
+            new_y[i * n_points + j] = torch.quantile(y[ids], p)
             new_X[i * n_points + j] = unique_comps[i]
 
     return new_X, new_y

From 90fd9ed4bc604b3055109834d9d65df9b6b5b87d Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 19 Dec 2022 22:25:47 -0700
Subject: [PATCH 47/50] version bumps

---
 .github/workflows/tests.yml         | 2 +-
 .github/workflows/tests_develop.yml | 2 +-
 nequip/utils/regressor.py           | 8 +-------
 setup.py                            | 2 +-
 4 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a6e9a8f0..1f835e90 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       matrix:
         python-version: [3.9]
-        torch-version: [1.10.1, 1.11.0]
+        torch-version: [1.11.0, 1.12.1]
 
     steps:
     - uses: actions/checkout@v2
diff --git a/.github/workflows/tests_develop.yml b/.github/workflows/tests_develop.yml
index e301f5e3..2c23350c 100644
--- a/.github/workflows/tests_develop.yml
+++ b/.github/workflows/tests_develop.yml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       matrix:
         python-version: [3.9]
-        torch-version: [1.11.0]
+        torch-version: [1.12.1]
 
     steps:
     - uses: actions/checkout@v2
diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py
index 9db6c86f..1e4e9015 100644
--- a/nequip/utils/regressor.py
+++ b/nequip/utils/regressor.py
@@ -66,13 +66,7 @@ def down_sampling_by_composition(
     for i in range(n_types):
         ids = sort_by[id_start[i] : id_end[i]]
         for j, p in enumerate(percentage):
-            # We comment out `interpolation="linear"` for PyTorch 1.10,
-            # where linear is the default but the `interpolation` option
-            # does not exist:
-            # https://pytorch.org/docs/1.10/generated/torch.quantile.html?highlight=quantile#torch.quantile
-            # https://pytorch.org/docs/1.13/generated/torch.quantile.html?highlight=quantile#torch.quantile
-            # previously, call had `, interpolation="linear")`
-            new_y[i * n_points + j] = torch.quantile(y[ids], p)
+            new_y[i * n_points + j] = torch.quantile(y[ids], p, interpolation="linear"))
             new_X[i * n_points + j] = unique_comps[i]
 
     return new_X, new_y
diff --git a/setup.py b/setup.py
index 24d37e24..d7a5b465 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
         "numpy",
         "ase",
         "tqdm",
-        "torch>=1.8,<=1.12,!=1.9.0",  # torch.fx added in 1.8
+        "torch>=1.10.0,<1.13,!=1.9.0",
         "e3nn>=0.4.4,<0.6.0",
         "pyyaml",
         "contextlib2;python_version<'3.7'",  # backport of nullcontext

From 82dbb9e8d7517d55267875f5279ef608e5ef8a9e Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 19 Dec 2022 22:29:53 -0700
Subject: [PATCH 48/50] typo

---
 nequip/utils/regressor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py
index 1e4e9015..30c8f9ab 100644
--- a/nequip/utils/regressor.py
+++ b/nequip/utils/regressor.py
@@ -66,7 +66,7 @@ def down_sampling_by_composition(
     for i in range(n_types):
         ids = sort_by[id_start[i] : id_end[i]]
         for j, p in enumerate(percentage):
-            new_y[i * n_points + j] = torch.quantile(y[ids], p, interpolation="linear"))
+            new_y[i * n_points + j] = torch.quantile(y[ids], p, interpolation="linear")
             new_X[i * n_points + j] = unique_comps[i]
 
     return new_X, new_y

From 4bb345d002153794cf2b9644e6052a3894cd5859 Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 19 Dec 2022 23:27:07 -0700
Subject: [PATCH 49/50] don't use `manual_seed` globally

---
 nequip/utils/unittests/conftest.py |  8 ++++----
 tests/unit/data/test_dataset.py    | 14 ++++++++------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/nequip/utils/unittests/conftest.py b/nequip/utils/unittests/conftest.py
index 77a91930..4cfa98ff 100644
--- a/nequip/utils/unittests/conftest.py
+++ b/nequip/utils/unittests/conftest.py
@@ -136,17 +136,17 @@ def atomic_batch(nequip_dataset):
 @pytest.fixture(scope="function")
 def per_species_set():
     dtype = torch.get_default_dtype()
-    torch.manual_seed(0)
+    rng = torch.Generator().manual_seed(127)
     mean_min = 1
     mean_max = 100
     std = 20
     n_sample = 1000
     n_species = 9
-    ref_mean = torch.rand((n_species)) * (mean_max - mean_min) + mean_min
+    ref_mean = torch.rand((n_species), generator=rng) * (mean_max - mean_min) + mean_min
     t_mean = torch.ones((n_sample, 1)) * ref_mean.reshape([1, -1])
-    ref_std = torch.rand((n_species)) * std
+    ref_std = torch.rand((n_species), generator=rng) * std
     t_std = torch.ones((n_sample, 1)) * ref_std.reshape([1, -1])
-    E = torch.normal(t_mean, t_std)
+    E = torch.normal(t_mean, t_std, generator=rng)
     return ref_mean.to(dtype), ref_std.to(dtype), E.to(dtype), n_sample, n_species
 
 
diff --git a/tests/unit/data/test_dataset.py b/tests/unit/data/test_dataset.py
index bad796c3..95cfe48d 100644
--- a/tests/unit/data/test_dataset.py
+++ b/tests/unit/data/test_dataset.py
@@ -231,8 +231,8 @@ def test_per_graph_field(self, npz_dataset, fixed_field, subset, key, dim):
         if npz_dataset is None:
             return
 
-        torch.manual_seed(0)
-        E = torch.rand((npz_dataset.len(),) + dim)
+        rng = torch.Generator().manual_seed(454)
+        E = torch.rand((npz_dataset.len(),) + dim, generator=rng)
         ref_mean = torch.mean(E / NATOMS, dim=0)
         ref_std = torch.std(E / NATOMS, dim=0)
 
@@ -437,12 +437,14 @@ def test_from_atoms(self, molecules):
 
 
 def generate_E(N, mean_min, mean_max, std):
-    torch.manual_seed(0)
-    ref_mean = torch.rand((N.shape[1])) * (mean_max - mean_min) + mean_min
+    rng = torch.Generator().manual_seed(568)
+    ref_mean = (
+        torch.rand((N.shape[1]), generator=rng) * (mean_max - mean_min) + mean_min
+    )
     t_mean = torch.ones((N.shape[0], 1)) * ref_mean.reshape([1, -1])
-    ref_std = torch.rand((N.shape[1])) * std
+    ref_std = torch.rand((N.shape[1]), generator=rng) * std
     t_std = torch.ones((N.shape[0], 1)) * ref_std.reshape([1, -1])
-    E = torch.normal(t_mean, t_std)
+    E = torch.normal(t_mean, t_std, generator=rng)
     return ref_mean, ref_std, (N * E).sum(axis=-1)
 
 

From 5a365e061019e8dae32bdb31dc830e378fe758ba Mon Sep 17 00:00:00 2001
From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com>
Date: Mon, 19 Dec 2022 23:27:59 -0700
Subject: [PATCH 50/50] always solve in float64

---
 nequip/utils/regressor.py       | 14 +++++++++-----
 tests/unit/utils/test_solver.py |  8 ++++----
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/nequip/utils/regressor.py b/nequip/utils/regressor.py
index 30c8f9ab..76d140bc 100644
--- a/nequip/utils/regressor.py
+++ b/nequip/utils/regressor.py
@@ -8,8 +8,10 @@
 
 
 def solver(X, y, alpha: Optional[float] = 0.001, stride: Optional[int] = 1, **kwargs):
-
-    dtype = torch.get_default_dtype()
+    # results are in the same "units" as y, so same dtype too:
+    dtype_out = y.dtype
+    # always solve in float64 for numerical stability
+    dtype = torch.float64
     X = X[::stride].to(dtype)
     y = y[::stride].to(dtype)
 
@@ -40,7 +42,7 @@ def solver(X, y, alpha: Optional[float] = 0.001, stride: Optional[int] = 1, **kw
 
     logging.debug(f"Ridge Regression, residue {sigma2}")
 
-    return mean, cov
+    return mean.to(dtype_out), cov.to(dtype_out)
 
 
 def down_sampling_by_composition(
@@ -61,8 +63,10 @@ def down_sampling_by_composition(
     id_end = torch.cat((node_icomp + 1, torch.as_tensor([len(sort_by)])))
 
     n_points = len(percentage)
-    new_X = torch.zeros((n_types * n_points, X.shape[1]))
-    new_y = torch.zeros((n_types * n_points))
+    new_X = torch.zeros(
+        (n_types * n_points, X.shape[1]), dtype=X.dtype, device=X.device
+    )
+    new_y = torch.zeros((n_types * n_points), dtype=y.dtype, device=y.device)
     for i in range(n_types):
         ids = sort_by[id_start[i] : id_end[i]]
         for j, p in enumerate(percentage):
diff --git a/tests/unit/utils/test_solver.py b/tests/unit/utils/test_solver.py
index 049c897d..de78cbd8 100644
--- a/tests/unit/utils/test_solver.py
+++ b/tests/unit/utils/test_solver.py
@@ -11,13 +11,13 @@ def test_random(full_rank, alpha, per_species_set):
     if alpha == 0 and not full_rank:
         return
 
-    torch.manual_seed(0)
+    rng = torch.Generator().manual_seed(343)
 
     ref_mean, ref_std, E, n_samples, n_dim = per_species_set
 
-    dtype = torch.get_default_dtype()
-
-    X = torch.randint(low=1, high=10, size=(n_samples, n_dim)).to(dtype)
+    X = torch.randint(low=1, high=10, size=(n_samples, n_dim), generator=rng).to(
+        torch.get_default_dtype()
+    )
     if not full_rank:
         X[:, n_dim - 2] = X[:, n_dim - 1] * 2
     y = (X * E).sum(axis=-1)