Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transformer class for the property selection #137

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from equistore import Labels, TensorBlock, TensorMap


class Transformer:
"""The 'Transformer' class makes it easy to create a representation matrix
class PropertiesSelector:
"""The 'PropertiesSelector' class makes it easy to create a representation matrix
when using some other matrix as a reference. A classic use case is to create
a TensorMap representation for a dataset, then perform transformations
within that TensorMap (e.g., keys_to_features or keys_to_properties), and
select the most useful features in the transformed TensorMap.
The 'Transformer' allows a set of these features to be used to calculate
The 'PropertiesSelector' allows a set of these features to be used to calculate
a new TensorMap, thus saving computation time and maintaining a single
representation for all representations.

Expand All @@ -23,27 +23,59 @@ class Transformer:
Two options are possible - 'keys_to_features' and 'keys_to_properties'.
#TODO: provide the ability to pass a list of conversions that will occur
one after the other.
:param moved_keys: Those keys which will be moved during the transformation.
This variable can accept knowledge of type str (one key), list (a list
of keys) and Labels (in addition to the name of the keys, pass a list
of keys to be moved).
:param calculator: an instance of the calculator that will calculate the
descriptor within this instance of the class.
:param keys_to_move: Those keys which will be moved during the transformation.
This variable can be anything supported by the
:py:class:`equistore.TensorMap.keys_to_properties` or
:py:class:`equistore.TensorMap.keys_to_samples` functions, i.e. one
string, a list of strings or an instance of :py:class:`equistore.Labels`
:param use_native_system: If ``True`` (this is the default), copy data
from the ``systems`` into Rust ``SimpleSystem``. This can be a lot
faster than having to cross the FFI boundary often when accessing
the neighbor list. Otherwise the Python neighbor list is used.

:param gradients: List of gradients to compute. If this is ``None`` or
an empty list ``[]``, no gradients are computed. Gradients are
stored inside the different blocks, and can be accessed with
``descriptor.block(...).gradient(<parameter>)``, where
``<parameter>`` is ``"positions"`` or ``"cell"``. The following
gradients are available:
"""

def __init__(self, selector, transformation=None, moved_keys=None):
def __init__(
self,
selector,
calculator,
transformation=None,
keys_to_move=None,
gradients=None,
use_native_system=True,
):
#
self.selector = selector
self.transformation = transformation
self.moved_keys = moved_keys
self._selector = selector
self._transformation = transformation
self._moved_keys = keys_to_move
self.calculator = calculator
self.calculator_grad = gradients
self.calculator_use_native_system = use_native_system
self.transformed_leys = None
self._moved_keys_names = None
self._initial_keys = None
self.tensor_map = None
self._initial_keys_names = None
self.selected_tensor = None

if (
(self.transformation is not None)
and (self.transformation != "keys_to_samples")
and (self.transformation != "keys_to_properties")
(self._transformation is not None)
and (self._transformation != "keys_to_samples")
and (self._transformation != "keys_to_properties")
):
raise ValueError(
"`transformation` parameter should be either `keys_to_samples`,"
f" either `keys_to_properties`, got {self.transformation}"
f" either `keys_to_properties`, got {self._transformation}"
)
if (self.transformation is None) and (self.moved_keys is not None):
if (self._transformation is None) and (self._moved_keys is not None):
raise ValueError("unable to shift keys: unknown transformation type")

def _copy(self, tensor_map):
Expand All @@ -55,77 +87,79 @@ def _copy(self, tensor_map):
blocks.append(block.copy())
return TensorMap(tensor_map.keys, blocks)

def keys_definition(self):
def _keys_definition(self):
"""This is another internal function that performs two main tasks.
First, it converts all moved_keys to the same format. What is
meant is that further we need the names of the keys we are going
to move, as well as the 'TensorMap' keys, which will be passed
to the compute function as a reference at the end. This function
stores the names of the moved keys in the 'moved_keys_names' array,
and stores the keys of the final TensorMap reference in 'final_keys'.
and stores the keys of the final TensorMap reference in 'transformed_leys'.
"""
# the first 2 cases are simple - we either copy the moved_keys directly,
# or create an array based on them, and simply take all the keys passed
# in the fit TensorMap step as the final keys.
if isinstance(self.moved_keys, str):
self.moved_keys_names = [self.moved_keys.copy()]
self.final_keys = self._old_keys
elif isinstance(self.moved_keys, list):
self.moved_keys_names = self.moved_keys.copy()
self.final_keys = self._old_keys
if isinstance(self._moved_keys, str):
self._moved_keys_names = [self.moved_keys]
self.transformed_leys = self._initial_keys
elif isinstance(self._moved_keys, list):
self._moved_keys_names = self._moved_keys.copy()
self.transformed_leys = self._initial_keys
else:
assert isinstance(self._moved_keys, Labels)

# The third case is a little more complicated.
# First, we save the names of the moved keys,
# taking them from Labels 'moved_keys'.
self.moved_keys_names = self.moved_keys.names
self._moved_keys_names = self._moved_keys.names
names = []
new_keys = []
# Let's write down the order of the keys we will have during the
# course of the algorithm in the 'names'
names.extend(self.tensor_map.keys.names)
names.extend(self.moved_keys_names)
names.extend(self._moved_keys_names)
# Now let's generate reference TensorMap keys. They will consist of
# two parts - those keys that were left after transformation, and
# those keys that were in the values of the variable moved_keys.
# Go through them and create all possible combinations of these
# parts.
for key in self.tensor_map.keys:
for value in self.moved_keys:
for value in self._moved_keys:
clue = [k.copy() for k in key]
clue.extend(value)
new_keys.append(clue)
# The keys have been listed in random order, let's arrange them and
# store the values in 'final_keys'.
# store the values in 'transformed_leys'.
indices = []
for key in self._old_keys_names:
for key in self._initial_keys_names:
indices.append(names.index(key))
ordered_keys = []
for el in new_keys:
key = [el[i] for i in indices]
ordered_keys.append(key)
self.final_keys = Labels(
names=self._old_keys_names, values=np.array(ordered_keys)
self.transformed_leys = Labels(
names=self._initial_keys_names, values=np.array(ordered_keys)
)

def _mover(self, tensor_map):
# Internal function that does the transformation of the reference
# Tensormap.
self._old_keys = tensor_map.keys
self._old_keys_names = tensor_map.keys.names
self._initial_keys = tensor_map.keys
self._initial_keys_names = tensor_map.keys.names
tensor_copy = self._copy(tensor_map)
if self.transformation is not None:
if self.transformation == "keys_to_samples":
tensor_copy.keys_to_samples(self.moved_keys)
elif self.transformation == "keys_to_properties":
tensor_copy.keys_to_properties(self.moved_keys)
if self._transformation is not None:
if self._transformation == "keys_to_samples":
tensor_copy.keys_to_samples(self._moved_keys)
elif self._transformation == "keys_to_properties":
tensor_copy.keys_to_properties(self._moved_keys)
return tensor_copy

def properties_selection(self):
def _properties_selection(self):
# This function selects properties according to a preset algorithm
# within each 'TensorMap' block
blocks = []
for _, block in self.tensor_map:
mask = self.selector.fit(block.values).get_support()
mask = self._selector.fit(block.values).get_support()
selected_properties = block.properties[mask]
blocks.append(
TensorBlock(
Expand All @@ -141,36 +175,43 @@ def properties_selection(self):

self.selected_tensor = TensorMap(self.tensor_map.keys, blocks)

def fit(self, tensor_map):
def fit(self, reference_frames):
"""The fit function tells the transformer which attributes to use when
creating new representations.

Parameters:
-----------
:param tensor_map: reference TensorMap, with which transformations are
carried out and in which properties are selected.
:param reference_frames: reference frames, with which representation
and then transformations are carried out and in which properties
are selected.
"""
tensor_map = self.calculator.compute(
systems=reference_frames,
gradients=self.calculator_grad,
use_native_system=self.calculator_use_native_system,
)
self.tensor_map = self._mover(tensor_map)
self.keys_definition()
self.properties_selection()
self._keys_definition()
self._properties_selection()

def transform(self, frames, calculator):
def transform(self, frames):
"""A function that creates a TensorMap representation based on the
passed frames as well as a previously performed fit.

Parameters:
-----------
:param frames: list with the frames to be processed during this function.
:param calculator: calculator that will compute the representation of
the transferred frames.
:param frames: list with the frames to be processed during this
function.
"""
if self.transformation is None:
if self._transformation is None:
# trivial case - nothing happened, do the usual calculation.
descriptor = calculator.compute(
frames, selected_properties=self.selected_tensor
descriptor = self.calculator.compute(
systems=frames,
gradients=self.calculator_grad,
use_native_system=self.calculator_use_native_system,
)
return descriptor
elif self.transformation == "keys_to_samples":
elif self._transformation == "keys_to_samples":
# In the second case the situation is a bit more complicated.
# Suppose we originally had a set of key names {'a', 'b', 'c'}.
# We moved key 'c' to samples. We are left with blocks with keys
Expand All @@ -182,9 +223,9 @@ def transform(self, frames, calculator):
blocks = []
idx = []
# save the positions of the moved keys.
for key in self.moved_keys_names:
idx.append(self.final_keys.names.index(key))
for obj in self.final_keys:
for key in self._moved_keys_names:
idx.append(self.transformed_leys.names.index(key))
for obj in self.transformed_leys:
# separate the moved keys, obtain a block based on the remainder
obt_key = tuple(item for i, item in enumerate(obj) if i not in idx)
if len(obt_key) == 0:
Expand All @@ -200,15 +241,17 @@ def transform(self, frames, calculator):
properties=block.properties,
)
)
properties_tensor = TensorMap(self.final_keys, blocks)
properties_tensor = TensorMap(self.transformed_leys, blocks)
# Do the final computation
descriptor = calculator.compute(
frames,
descriptor = self.calculator.compute(
systems=frames,
gradients=self.calculator_grad,
use_native_system=self.calculator_use_native_system,
selected_properties=properties_tensor,
selected_keys=self.final_keys,
selected_keys=self.transformed_leys,
)
return descriptor
elif self.transformation == "keys_to_properties":
elif self._transformation == "keys_to_properties":
# The third case is the most complicated. Again, let's start with a
# TensorMap with {'a', 'b', 'c'} keys. Suppose we move the 'c' keys
# to properties. We take the final key {a_1, b_1, c_1}. Its
Expand All @@ -219,7 +262,7 @@ def transform(self, frames, calculator):

# save positions of the moved keys in the properties array
pos_in_prop = []
for key in self.moved_keys_names:
for key in self._moved_keys_names:
pos_in_prop.append(self.tensor_map.property_names.index(key))
idx = []
property_names = []
Expand All @@ -228,12 +271,12 @@ def transform(self, frames, calculator):
if i not in pos_in_prop:
property_names.append(key)
# determine the positions of the moved keys in the final keys
for key in self.moved_keys_names:
idx.append(self.final_keys.names.index(key))
for key in self._moved_keys_names:
idx.append(self.transformed_leys.names.index(key))
# in this dictionary we write a list of properties, which we will
# save for each block.
properties_dict = {}
for obj in self.final_keys:
for obj in self.transformed_leys:
obj_tuple = tuple(item for item in obj)
properties_dict[obj_tuple] = []
# running through all the keys of the transformed tensor
Expand All @@ -257,7 +300,7 @@ def transform(self, frames, calculator):
key_ind = 0
# put the key together from the two pieces - the one you
# moved and the one you have left
for i in range(len(self.final_keys.names)):
for i in range(len(self.transformed_leys.names)):
if i in idx:
obt_key.append(add_key[add_key_ind])
add_key_ind += 1
Expand All @@ -269,7 +312,7 @@ def transform(self, frames, calculator):
properties_dict[obt_key].append(property_initial)
blocks = []
# go through the original keys to create a tensor for selection
for key in self.final_keys:
for key in self.transformed_leys:
key = tuple(key)
# In theory, we may find that we have not selected any property
# that is correspond to this block - take this into account.
Expand All @@ -287,10 +330,12 @@ def transform(self, frames, calculator):
properties=properties,
)
)
properties_tensor = TensorMap(self.final_keys, blocks)
descriptor = calculator.compute(
frames,
properties_tensor = TensorMap(self.transformed_leys, blocks)
descriptor = self.calculator.compute(
systems=frames,
gradients=self.calculator_grad,
use_native_system=self.calculator_use_native_system,
selected_properties=properties_tensor,
selected_keys=self.final_keys,
selected_keys=self.transformed_leys,
)
return descriptor
Loading