metatensor · hurricane642 · Nov 24, 2022 · Dec 19, 2022 · Dec 23, 2022
diff --git a/python/rascaline/transformer.py → python/rascaline/properties_selector.py b/python/rascaline/transformer.py → python/rascaline/properties_selector.py
@@ -3,13 +3,13 @@
 from equistore import Labels, TensorBlock, TensorMap
 
 
-class Transformer:
-    """The 'Transformer' class makes it easy to create a representation matrix
+class PropertiesSelector:
+    """The 'PropertiesSelector' class makes it easy to create a representation matrix
     when using some other matrix as a reference. A classic use case is to create
     a TensorMap representation for a dataset, then perform transformations
     within that TensorMap (e.g., keys_to_features or keys_to_properties), and
     select the most useful features in the transformed TensorMap.
-    The 'Transformer' allows a set of these features to be used to calculate
+    The 'PropertiesSelector' allows a set of these features to be used to calculate
     a new TensorMap, thus saving computation time and maintaining a single
     representation for all representations.
 
@@ -23,27 +23,59 @@ class Transformer:
         Two options are possible - 'keys_to_features' and 'keys_to_properties'.
         #TODO: provide the ability to pass a list of conversions that will occur
         one after the other.
-    :param moved_keys: Those keys which will be moved during the transformation.
-        This variable can accept knowledge of type str (one key), list (a list
-        of keys) and Labels (in addition to the name of the keys, pass a list
-        of keys to be moved).
+    :param calculator: an instance of the calculator that will calculate the
+        descriptor within this instance of the class.
+    :param keys_to_move: Those keys which will be moved during the transformation.
+        This variable can be anything supported by the
+        :py:class:`equistore.TensorMap.keys_to_properties` or
+        :py:class:`equistore.TensorMap.keys_to_samples` functions, i.e. one
+        string, a list of strings or an instance of :py:class:`equistore.Labels`
+    :param use_native_system: If ``True`` (this is the default), copy data
+        from the ``systems`` into Rust ``SimpleSystem``. This can be a lot
+        faster than having to cross the FFI boundary often when accessing
+        the neighbor list. Otherwise the Python neighbor list is used.
+
+    :param gradients: List of gradients to compute. If this is ``None`` or
+        an empty list ``[]``, no gradients are computed. Gradients are
+        stored inside the different blocks, and can be accessed with
+        ``descriptor.block(...).gradient(<parameter>)``, where
+        ``<parameter>`` is ``"positions"`` or ``"cell"``. The following
+        gradients are available:
     """
 
-    def __init__(self, selector, transformation=None, moved_keys=None):
+    def __init__(
+        self,
+        selector,
+        calculator,
+        transformation=None,
+        keys_to_move=None,
+        gradients=None,
+        use_native_system=True,
+    ):
         #
-        self.selector = selector
-        self.transformation = transformation
-        self.moved_keys = moved_keys
+        self._selector = selector
+        self._transformation = transformation
+        self._moved_keys = keys_to_move
+        self.calculator = calculator
+        self.calculator_grad = gradients
+        self.calculator_use_native_system = use_native_system
+        self.transformed_leys = None
+        self._moved_keys_names = None
+        self._initial_keys = None
+        self.tensor_map = None
+        self._initial_keys_names = None
+        self.selected_tensor = None
+
         if (
-            (self.transformation is not None)
-            and (self.transformation != "keys_to_samples")
-            and (self.transformation != "keys_to_properties")
+            (self._transformation is not None)
+            and (self._transformation != "keys_to_samples")
+            and (self._transformation != "keys_to_properties")
         ):
             raise ValueError(
                 "`transformation` parameter should be either `keys_to_samples`,"
-                f" either `keys_to_properties`, got {self.transformation}"
+                f" either `keys_to_properties`, got {self._transformation}"
             )
-        if (self.transformation is None) and (self.moved_keys is not None):
+        if (self._transformation is None) and (self._moved_keys is not None):
             raise ValueError("unable to shift keys: unknown transformation type")
 
     def _copy(self, tensor_map):
@@ -55,77 +87,79 @@ def _copy(self, tensor_map):
             blocks.append(block.copy())
         return TensorMap(tensor_map.keys, blocks)
 
-    def keys_definition(self):
+    def _keys_definition(self):
         """This is another internal function that performs two main tasks.
         First, it converts all moved_keys to the same format.  What is
         meant is that further we need the names of the keys we are going
         to move, as well as the 'TensorMap' keys, which will be passed
         to the compute function as a reference at the end. This function
         stores the names of the moved keys in the 'moved_keys_names' array,
-        and stores the keys of the final TensorMap reference in 'final_keys'.
+        and stores the keys of the final TensorMap reference in 'transformed_leys'.
         """
         # the first 2 cases are simple - we either copy the moved_keys directly,
         # or create an array based on them, and simply take all the keys passed
         # in the fit TensorMap step as the final keys.
-        if isinstance(self.moved_keys, str):
-            self.moved_keys_names = [self.moved_keys.copy()]
-            self.final_keys = self._old_keys
-        elif isinstance(self.moved_keys, list):
-            self.moved_keys_names = self.moved_keys.copy()
-            self.final_keys = self._old_keys
+        if isinstance(self._moved_keys, str):
+            self._moved_keys_names = [self.moved_keys]
+            self.transformed_leys = self._initial_keys
+        elif isinstance(self._moved_keys, list):
+            self._moved_keys_names = self._moved_keys.copy()
+            self.transformed_leys = self._initial_keys
         else:
+            assert isinstance(self._moved_keys, Labels)
+
             # The third case is a little more complicated.
             # First, we save the names of the moved keys,
             # taking them from Labels 'moved_keys'.
-            self.moved_keys_names = self.moved_keys.names
+            self._moved_keys_names = self._moved_keys.names
             names = []
             new_keys = []
             # Let's write down the order of the keys we will have during the
             # course of the algorithm in the 'names'
             names.extend(self.tensor_map.keys.names)
-            names.extend(self.moved_keys_names)
+            names.extend(self._moved_keys_names)
             # Now let's generate reference TensorMap keys. They will consist of
             # two parts - those keys that were left after transformation, and
             # those keys that were in the values of the variable moved_keys.
             # Go through them and create all possible combinations of these
             # parts.
             for key in self.tensor_map.keys:
-                for value in self.moved_keys:
+                for value in self._moved_keys:
                     clue = [k.copy() for k in key]
                     clue.extend(value)
                     new_keys.append(clue)
             # The keys have been listed in random order, let's arrange them and
-            # store the values in 'final_keys'.
+            # store the values in 'transformed_leys'.
             indices = []
-            for key in self._old_keys_names:
+            for key in self._initial_keys_names:
                 indices.append(names.index(key))
             ordered_keys = []
             for el in new_keys:
                 key = [el[i] for i in indices]
                 ordered_keys.append(key)
-            self.final_keys = Labels(
-                names=self._old_keys_names, values=np.array(ordered_keys)
+            self.transformed_leys = Labels(
+                names=self._initial_keys_names, values=np.array(ordered_keys)
             )
 
     def _mover(self, tensor_map):
         # Internal function that does the transformation of the reference
         # Tensormap.
-        self._old_keys = tensor_map.keys
-        self._old_keys_names = tensor_map.keys.names
+        self._initial_keys = tensor_map.keys
+        self._initial_keys_names = tensor_map.keys.names
         tensor_copy = self._copy(tensor_map)
-        if self.transformation is not None:
-            if self.transformation == "keys_to_samples":
-                tensor_copy.keys_to_samples(self.moved_keys)
-            elif self.transformation == "keys_to_properties":
-                tensor_copy.keys_to_properties(self.moved_keys)
+        if self._transformation is not None:
+            if self._transformation == "keys_to_samples":
+                tensor_copy.keys_to_samples(self._moved_keys)
+            elif self._transformation == "keys_to_properties":
+                tensor_copy.keys_to_properties(self._moved_keys)
         return tensor_copy
 
-    def properties_selection(self):
+    def _properties_selection(self):
         # This function selects properties according to a preset algorithm
         # within each 'TensorMap' block
         blocks = []
         for _, block in self.tensor_map:
-            mask = self.selector.fit(block.values).get_support()
+            mask = self._selector.fit(block.values).get_support()
             selected_properties = block.properties[mask]
             blocks.append(
                 TensorBlock(
@@ -141,36 +175,43 @@ def properties_selection(self):
 
         self.selected_tensor = TensorMap(self.tensor_map.keys, blocks)
 
-    def fit(self, tensor_map):
+    def fit(self, reference_frames):
         """The fit function tells the transformer which attributes to use when
         creating new representations.
 
         Parameters:
         -----------
-        :param tensor_map: reference TensorMap, with which transformations are
-            carried out and in which properties are selected.
+        :param reference_frames: reference frames, with which representation
+            and then transformations are carried out and in which properties
+            are selected.
         """
+        tensor_map = self.calculator.compute(
+            systems=reference_frames,
+            gradients=self.calculator_grad,
+            use_native_system=self.calculator_use_native_system,
+        )
         self.tensor_map = self._mover(tensor_map)
-        self.keys_definition()
-        self.properties_selection()
+        self._keys_definition()
+        self._properties_selection()
 
-    def transform(self, frames, calculator):
+    def transform(self, frames):
         """A function that creates a TensorMap representation based on the
         passed frames as well as a previously performed fit.
 
         Parameters:
         -----------
-        :param frames: list with the frames to be processed during this function.
-        :param calculator: calculator that will compute the representation of
-            the transferred frames.
+        :param frames: list with the frames to be processed during this
+            function.
         """
-        if self.transformation is None:
+        if self._transformation is None:
             # trivial case - nothing happened, do the usual calculation.
-            descriptor = calculator.compute(
-                frames, selected_properties=self.selected_tensor
+            descriptor = self.calculator.compute(
+                systems=frames,
+                gradients=self.calculator_grad,
+                use_native_system=self.calculator_use_native_system,
             )
             return descriptor
-        elif self.transformation == "keys_to_samples":
+        elif self._transformation == "keys_to_samples":
             # In the second case the situation is a bit more complicated.
             # Suppose we originally had a set of key names {'a', 'b', 'c'}.
             # We moved key 'c' to samples. We are left with blocks with keys
@@ -182,9 +223,9 @@ def transform(self, frames, calculator):
             blocks = []
             idx = []
             # save the positions of the moved keys.
-            for key in self.moved_keys_names:
-                idx.append(self.final_keys.names.index(key))
-            for obj in self.final_keys:
+            for key in self._moved_keys_names:
+                idx.append(self.transformed_leys.names.index(key))
+            for obj in self.transformed_leys:
                 # separate the moved keys, obtain a block based on the remainder
                 obt_key = tuple(item for i, item in enumerate(obj) if i not in idx)
                 if len(obt_key) == 0:
@@ -200,15 +241,17 @@ def transform(self, frames, calculator):
                         properties=block.properties,
                     )
                 )
-            properties_tensor = TensorMap(self.final_keys, blocks)
+            properties_tensor = TensorMap(self.transformed_leys, blocks)
             # Do the final computation
-            descriptor = calculator.compute(
-                frames,
+            descriptor = self.calculator.compute(
+                systems=frames,
+                gradients=self.calculator_grad,
+                use_native_system=self.calculator_use_native_system,
                 selected_properties=properties_tensor,
-                selected_keys=self.final_keys,
+                selected_keys=self.transformed_leys,
             )
             return descriptor
-        elif self.transformation == "keys_to_properties":
+        elif self._transformation == "keys_to_properties":
             # The third case is the most complicated. Again, let's start with a
             # TensorMap with {'a', 'b', 'c'} keys. Suppose we move the 'c' keys
             # to properties. We take the final key {a_1, b_1, c_1}. Its
@@ -219,7 +262,7 @@ def transform(self, frames, calculator):
 
             # save positions of the moved keys in the properties array
             pos_in_prop = []
-            for key in self.moved_keys_names:
+            for key in self._moved_keys_names:
                 pos_in_prop.append(self.tensor_map.property_names.index(key))
             idx = []
             property_names = []
@@ -228,12 +271,12 @@ def transform(self, frames, calculator):
                 if i not in pos_in_prop:
                     property_names.append(key)
             # determine the positions of the moved keys in the final keys
-            for key in self.moved_keys_names:
-                idx.append(self.final_keys.names.index(key))
+            for key in self._moved_keys_names:
+                idx.append(self.transformed_leys.names.index(key))
             # in this dictionary we write a list of properties, which we will
             # save for each block.
             properties_dict = {}
-            for obj in self.final_keys:
+            for obj in self.transformed_leys:
                 obj_tuple = tuple(item for item in obj)
                 properties_dict[obj_tuple] = []
             # running through all the keys of the transformed tensor
@@ -257,7 +300,7 @@ def transform(self, frames, calculator):
                     key_ind = 0
                     # put the key together from the two pieces - the one you
                     # moved and the one you have left
-                    for i in range(len(self.final_keys.names)):
+                    for i in range(len(self.transformed_leys.names)):
                         if i in idx:
                             obt_key.append(add_key[add_key_ind])
                             add_key_ind += 1
@@ -269,7 +312,7 @@ def transform(self, frames, calculator):
                     properties_dict[obt_key].append(property_initial)
             blocks = []
             # go through the original keys to create a tensor for selection
-            for key in self.final_keys:
+            for key in self.transformed_leys:
                 key = tuple(key)
                 # In theory, we may find that we have not selected any property
                 # that is correspond to this block - take this into account.
@@ -287,10 +330,12 @@ def transform(self, frames, calculator):
                         properties=properties,
                     )
                 )
-            properties_tensor = TensorMap(self.final_keys, blocks)
-            descriptor = calculator.compute(
-                frames,
+            properties_tensor = TensorMap(self.transformed_leys, blocks)
+            descriptor = self.calculator.compute(
+                systems=frames,
+                gradients=self.calculator_grad,
+                use_native_system=self.calculator_use_native_system,
                 selected_properties=properties_tensor,
-                selected_keys=self.final_keys,
+                selected_keys=self.transformed_leys,
             )
             return descriptor