From 5e33e80f4636a23541212b98aef30ec1e218d3ed Mon Sep 17 00:00:00 2001 From: Luthaf Date: Wed, 15 May 2024 16:21:49 +0200 Subject: [PATCH] Implement selected_atoms --- regtest/metatensor/rt-soap/plumed.dat | 14 ++ .../rt-soap/soap_selected_data.reference | 2 + src/metatensor/metatensor.cpp | 183 +++++++++++++----- 3 files changed, 154 insertions(+), 45 deletions(-) create mode 100644 regtest/metatensor/rt-soap/soap_selected_data.reference diff --git a/regtest/metatensor/rt-soap/plumed.dat b/regtest/metatensor/rt-soap/plumed.dat index 6980d317cd..8f3bf7ae44 100644 --- a/regtest/metatensor/rt-soap/plumed.dat +++ b/regtest/metatensor/rt-soap/plumed.dat @@ -8,9 +8,23 @@ soap: METATENSOR ... SPECIES_TO_TYPES=6,1,8 ... +soap_selected: METATENSOR ... + MODEL=soap_cv.pt + EXTENSIONS_DIRECTORY=extensions + + SPECIES1=1-26 + SPECIES2=27-62 + SPECIES3=63-76 + SPECIES_TO_TYPES=6,1,8 + + # select out of order to make sure this is respected in the output + SELECTED_ATOMS=2,3,1 +... + scalar: SUM ARG=soap PERIODIC=NO BIASVALUE ARG=scalar PRINT ARG=soap FILE=soap_data STRIDE=1 FMT=%8.4f +PRINT ARG=soap_selected FILE=soap_selected_data STRIDE=1 FMT=%8.4f diff --git a/regtest/metatensor/rt-soap/soap_selected_data.reference b/regtest/metatensor/rt-soap/soap_selected_data.reference new file mode 100644 index 0000000000..805a7de6d5 --- /dev/null +++ b/regtest/metatensor/rt-soap/soap_selected_data.reference @@ -0,0 +1,2 @@ +#! FIELDS time soap_selected.1.1 soap_selected.1.2 soap_selected.1.3 soap_selected.2.1 soap_selected.2.2 soap_selected.2.3 soap_selected.3.1 soap_selected.3.2 soap_selected.3.3 + 0.000000 6.0785 6.3903 6.9409 5.2246 4.6212 5.9061 5.3739 5.3189 6.4924 diff --git a/src/metatensor/metatensor.cpp b/src/metatensor/metatensor.cpp index eabef5f503..4ac8b42fde 100644 --- a/src/metatensor/metatensor.cpp +++ b/src/metatensor/metatensor.cpp @@ -35,18 +35,11 @@ directory defines a custom machine learning CV that can be used with PLUMED. \par Examples The following input shows how you can call metatensor and evaluate the model -that is described in the file soap_cv.pt from PLUMED. To evaluate this model -plumed is required to use code that is included in the directory extensions -which has been specified using the `EXTENSIONS_DIRECTORY` flag. Numbered -`SPECIES` labels are used to indicate the list of indices that belong to each -atomic species in the model. The `SPECIES_TO_TYPE` keyword then provides -information on the atom type for each species. The first number here is the -atomic number of the atoms that have been specified using the `SPECIES1` flag, -the second number is the atomic number of the atoms that have been specified -using the `SPECIES2` flag and so on. +that is described in the file `custom_cv.pt` from PLUMED. -\plumedfile soap: METATENSOR ... MODEL=soap_cv.pt -EXTENSIONS_DIRECTORY=extensions +\plumedfile +metatensor_cv: METATENSOR ... + MODEL=custom_cv.pt SPECIES1=1-26 SPECIES2=27-62 @@ -55,6 +48,47 @@ EXTENSIONS_DIRECTORY=extensions ... \endplumedfile +The numbered `SPECIES` labels are used to indicate the list of atoms that belong +to each atomic species in the system. The `SPECIES_TO_TYPE` keyword then +provides information on the atom type for each species. The first number here is +the atomic type of the atoms that have been specified using the `SPECIES1` flag, +the second number is the atomic number of the atoms that have been specified +using the `SPECIES2` flag and so on. + +`METATENSOR` action also accepts the following options: + +- `EXTENSIONS_DIRECTORY` should be the path to a directory containing + TorchScript extensions (as shared libraries) that are required to load and + execute the model. This matches the `collect_extensions` argument to + `MetatensorAtomisticModel.export` in Python. +- `NO_CONSISTENCY_CHECK` can be used to disable internal consistency checks; +- `SELECTED_ATOMS` can be used to signal the metatensor models that it should + only run its calculation for the selected subset of atoms. The model still + need to know about all the atoms in the system (through the `SPECIES` + keyword); but this can be used to reduce the calculation cost. Note that the + indices of the selected atoms should start at 1 in the PLUMED input file, but + they will be translated to start at 0 when given to the model (i.e. in + Python/TorchScript, the `forward` method will receive a `selected_atoms` which + starts at 0) + +Here is another example with all the possible keywords: + +\plumedfile +soap: METATENSOR ... + MODEL=soap.pt + EXTENSION_DIRECTORY=extensions + NO_CONSISTENCY_CHECK + + SPECIES1=1-10 + SPECIES2=11-20 + SPECIES_TO_TYPES=8,13 + + # only run the calculation for the Aluminium (type 13) atoms, but + # include the Oxygen (type 8) as potential neighbors. + SELECTED_ATOMS=11-20 +... +\endplumedfile + \par Collective variables and metatensor models Collective variables are not yet part of the [known outputs][mts_outputs] for @@ -309,8 +343,6 @@ MetatensorPlumedAction::MetatensorPlumedAction(const ActionOptions& options): } this->atomic_types_ = torch::tensor(std::move(atomic_types)); - - // Request the atoms and check we have read in everything this->requestAtoms(all_atoms); bool no_consistency_check = false; @@ -346,10 +378,6 @@ MetatensorPlumedAction::MetatensorPlumedAction(const ActionOptions& options): output->explicit_gradients = {}; evaluations_options_->outputs.insert("plumed::cv", output); - // TODO: selected_atoms - // evaluations_options_->set_selected_atoms() - - // Determine which device we should use based on user input, what the model // supports and what's available auto available_devices = std::vector(); @@ -435,9 +463,8 @@ MetatensorPlumedAction::MetatensorPlumedAction(const ActionOptions& options): auto tensor_options = torch::TensorOptions().dtype(this->dtype_).device(this->device_); this->strain_ = torch::eye(3, tensor_options.requires_grad(true)); - // setup storage for the computed CV: we need to run the model once to know - // the shape of the output, so we use a dummy system with one since atom for - // this + // determine how many properties there will be in the output by running the + // model once on a dummy system auto dummy_system = torch::make_intrusive( /*types = */ torch::zeros({0}, tensor_options.dtype(torch::kInt32)), /*positions = */ torch::zeros({0, 3}, tensor_options), @@ -461,16 +488,52 @@ MetatensorPlumedAction::MetatensorPlumedAction(const ActionOptions& options): dummy_system->add_neighbor_list(request, neighbors); } + this->n_properties_ = static_cast( + this->executeModel(dummy_system)->properties()->count() + ); + + // parse and handle atom sub-selection. This is done AFTER determining the + // output size, since the selection might not be valid for the dummy system + std::vector selected_atoms; + this->parseVector("SELECTED_ATOMS", selected_atoms); + if (!selected_atoms.empty()) { + auto selection_value = torch::zeros( + {static_cast(selected_atoms.size()), 2}, + torch::TensorOptions().dtype(torch::kInt32).device(this->device_) + ); + + for (unsigned i=0; i(this->atomic_types_.size(0)); + if (selected_atoms[i] <= 0 || selected_atoms[i] > n_atoms) { + this->error( + "Values in metatensor's SELECTED_ATOMS should be between 1 " + "and the number of atoms (" + std::to_string(n_atoms) + "), " + "got " + std::to_string(selected_atoms[i])); + } + // PLUMED input uses 1-based indexes, but metatensor wants 0-based + selection_value[i][1] = selected_atoms[i] - 1; + } + + evaluations_options_->set_selected_atoms( + torch::make_intrusive( + std::vector{"system", "atom"}, selection_value + ) + ); + } + + // Now that we now both n_samples and n_properties, we can setup the + // PLUMED-side storage for the computed CV if (output->per_atom) { - this->n_samples_ = static_cast(this->atomic_types_.size(0)); + auto selected_atoms = this->evaluations_options_->get_selected_atoms(); + if (selected_atoms.has_value()) { + this->n_samples_ = static_cast(selected_atoms.value()->count()); + } else { + this->n_samples_ = static_cast(this->atomic_types_.size(0)); + } } else { this->n_samples_ = 1; } - this->n_properties_ = static_cast( - this->executeModel(dummy_system)->properties()->count() - ); - if (n_samples_ == 1 && n_properties_ == 1) { log.printf(" the output of this model is a scalar\n"); @@ -511,6 +574,8 @@ void MetatensorPlumedAction::createSystem() { plumed_merror(oss.str()); } + // this->getTotAtoms() + const auto& cell = this->getPbc().getBox(); auto cpu_f64_tensor = torch::TensorOptions().dtype(torch::kFloat64).device(torch::kCPU); @@ -714,25 +779,39 @@ void MetatensorPlumedAction::calculate() { } } } else { - auto samples = block->samples()->as_metatensor(); - plumed_assert(samples.names().size() == 2); - plumed_assert(samples.names()[0] == std::string("system")); - plumed_assert(samples.names()[1] == std::string("atom")); - - auto& samples_values = samples.values(); + auto samples = block->samples(); + plumed_assert((samples->names() == std::vector{"system", "atom"})); + + auto samples_values = samples->values().to(torch::kCPU); + auto selected_atoms = this->evaluations_options_->get_selected_atoms(); + + // handle the possibility that samples are returned in + // a non-sorted order. + auto get_output_location = [&](unsigned i) { + if (selected_atoms.has_value()) { + // If the users picked some selected atoms, then we store the + // output in the same order as the selection was given + auto sample = samples_values.index({static_cast(i), torch::indexing::Slice()}); + auto position = selected_atoms.value()->position(sample); + plumed_assert(position.has_value()); + return static_cast(position.value()); + } else { + return static_cast(samples_values[i][1].item()); + } + }; if (n_properties_ == 1) { // we have a single CV describing multiple things (i.e. atoms) for (unsigned i=0; i(samples_values(i, 1)); - value->set(atom_i, torch_values[i][0].item()); + auto output_i = get_output_location(i); + value->set(output_i, torch_values[i][0].item()); } } else { // the CV is a matrix for (unsigned i=0; i(samples_values(i, 1)); + auto output_i = get_output_location(i); for (unsigned j=0; jset(atom_i * n_properties_ + j, torch_values[i][j].item()); + value->set(output_i * n_properties_ + j, torch_values[i][j].item()); } } } @@ -759,23 +838,34 @@ void MetatensorPlumedAction::apply() { } } } else { - auto samples = block->samples()->as_metatensor(); - plumed_assert(samples.names().size() == 2); - plumed_assert(samples.names()[0] == std::string("system")); - plumed_assert(samples.names()[1] == std::string("atom")); - - auto& samples_values = samples.values(); + auto samples = block->samples(); + plumed_assert((samples->names() == std::vector{"system", "atom"})); + + auto samples_values = samples->values().to(torch::kCPU); + auto selected_atoms = this->evaluations_options_->get_selected_atoms(); + + // see above for an explanation of why we use this function + auto get_output_location = [&](unsigned i) { + if (selected_atoms.has_value()) { + auto sample = samples_values.index({static_cast(i), torch::indexing::Slice()}); + auto position = selected_atoms.value()->position(sample); + plumed_assert(position.has_value()); + return static_cast(position.value()); + } else { + return static_cast(samples_values[i][1].item()); + } + }; if (n_properties_ == 1) { for (unsigned i=0; i(samples_values(i, 1)); - output_grad[i][0] = value->getForce(atom_i); + auto output_i = get_output_location(i); + output_grad[i][0] = value->getForce(output_i); } } else { for (unsigned i=0; i(samples_values(i, 1)); + auto output_i = get_output_location(i); for (unsigned j=0; jgetForce(atom_i * n_properties_ + j); + output_grad[i][j] = value->getForce(output_i * n_properties_ + j); } } } @@ -842,6 +932,9 @@ namespace PLMD { namespace metatensor { keys.add("numbered", "SPECIES", "the atoms in each PLUMED species"); keys.reset_style("SPECIES", "atoms"); + keys.add("optional", "SELECTED_ATOMS", "subset of atoms that should be used for the calculation"); + keys.reset_style("SELECTED_ATOMS", "atoms"); + keys.add("optional", "SPECIES_TO_TYPES", "mapping from PLUMED SPECIES to metatensor's atomic types"); }