Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallelizes MDAnalysis.analysis.msd #4896

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Enhancements
* Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670)
* Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824)
* Added `precision` for XYZWriter (Issue #4775, PR #4771)
* Parallelize `analysis.rdf.EinsteinMSD` (Issue #4676, PR #4896)


Changes
Expand Down
44 changes: 42 additions & 2 deletions package/MDAnalysis/analysis/msd.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@
import numpy as np
import logging
from ..due import due, Doi
from .base import AnalysisBase
from .base import AnalysisBase, ResultsGroup
from ..core import groups
from tqdm import tqdm

Expand Down Expand Up @@ -299,8 +299,22 @@


.. versionadded:: 2.0.0
.. versionchanged:: 2.9.0
Enabled **parallel execution** with the ``multiprocessing`` and ``dask``
backends; use the new method :meth:`get_supported_backends` to see all
supported backends.
"""

@classmethod
def get_supported_backends(cls):
return (
"serial",
"multiprocessing",
"dask",
)

_analysis_algorithm_is_parallelizable = True

def __init__(self, u, select="all", msd_type="xyz", fft=True, **kwargs):
r"""
Parameters
Expand Down Expand Up @@ -333,6 +347,7 @@
# local
self.ag = u.select_atoms(self.select)
self.n_particles = len(self.ag)
self.results._position_array = None
self._position_array = None

# result
Expand All @@ -345,6 +360,9 @@
self.results.msds_by_particle = np.zeros(
(self.n_frames, self.n_particles)
)
self.results._position_array = np.zeros(
(self.n_frames, self.n_particles, self.dim_fac)
)
self._position_array = np.zeros(
(self.n_frames, self.n_particles, self.dim_fac)
)
Expand Down Expand Up @@ -381,8 +399,29 @@
self._position_array[self._frame_index] = self.ag.positions[
:, self._dim
]
self.results._position_array[self._frame_index] = self.ag.positions[
:, self._dim
]

# aggregator wants 'timeseries' and 'msds_by_particle' to be passed
# so using this as a workaround
@staticmethod
def f(arrs):
pass

def _get_aggregator(self):
return ResultsGroup(
lookup={
"_position_array": ResultsGroup.ndarray_vstack,
"msds_by_particle": self.f,
"timeseries": self.f,
}
)

def _conclude(self):
self.results.msds_by_particle = np.zeros(
(self.n_frames, self.n_particles)
)
if self.fft:
self._conclude_fft()
else:
Expand All @@ -391,6 +430,7 @@
def _conclude_simple(self):
r"""Calculates the MSD via the simple "windowed" algorithm."""
lagtimes = np.arange(1, self.n_frames)
self._position_array = self.results._position_array
positions = self._position_array.astype(np.float64)
for lag in tqdm(lagtimes):
disp = positions[:-lag, :, :] - positions[lag:, :, :]
Expand All @@ -414,7 +454,7 @@

or set fft=False"""
)

self._position_array = self.results._position_array

Check warning on line 457 in package/MDAnalysis/analysis/msd.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/msd.py#L457

Added line #L457 was not covered by tests
positions = self._position_array.astype(np.float64)
for n in tqdm(range(self.n_particles)):
self.results.msds_by_particle[:, n] = tidynamics.msd(
Expand Down
6 changes: 6 additions & 0 deletions testsuite/MDAnalysisTests/analysis/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from MDAnalysis.analysis.nucleicacids import NucPairDist
from MDAnalysis.analysis.contacts import Contacts
from MDAnalysis.analysis.density import DensityAnalysis
from MDAnalysis.analysis.msd import EinsteinMSD
from MDAnalysis.lib.util import is_installed


Expand Down Expand Up @@ -176,3 +177,8 @@ def client_Contacts(request):
@pytest.fixture(scope="module", params=params_for_cls(DensityAnalysis))
def client_DensityAnalysis(request):
return request.param


@pytest.fixture(scope="module", params=params_for_cls(EinsteinMSD))
def client_EinsteinMSD(request):
return request.param
69 changes: 49 additions & 20 deletions testsuite/MDAnalysisTests/analysis/test_msd.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ def random_walk_u():


@pytest.fixture(scope="module")
def msd(u, SELECTION):
def msd(u, SELECTION, client_EinsteinMSD):
# non fft msd
m = MSD(u, SELECTION, msd_type="xyz", fft=False)
m.run()
m.run(**client_EinsteinMSD)
return m


Expand All @@ -76,11 +76,11 @@ def step_traj(NSTEP): # constant velocity


@block_import("tidynamics")
def test_notidynamics(u, SELECTION):
def test_notidynamics(u, SELECTION, client_EinsteinMSD):
with pytest.raises(ImportError, match="tidynamics was not found"):
u = mda.Universe(PSF, DCD)
msd = MSD(u, SELECTION)
msd.run()
msd.run(**client_EinsteinMSD)


def characteristic_poly(n, d):
Expand Down Expand Up @@ -125,12 +125,12 @@ def test_msdtype_error(self, u, SELECTION, msdtype):
],
)
def test_simple_step_traj_all_dims(
self, step_traj, NSTEP, dim, dim_factor
self, step_traj, NSTEP, dim, dim_factor, client_EinsteinMSD
):
# testing the "simple" algorithm on constant velocity trajectory
# should fit the polynomial y=dim_factor*x**2
m_simple = MSD(step_traj, "all", msd_type=dim, fft=False)
m_simple.run()
m_simple.run(**client_EinsteinMSD)
poly = characteristic_poly(NSTEP, dim_factor)
assert_almost_equal(m_simple.results.timeseries, poly, decimal=4)

Expand Down Expand Up @@ -159,10 +159,10 @@ def test_simple_start_stop_step_all_dims(
m_simple.results.timeseries, poly[0:990:10], decimal=4
)

def test_random_walk_u_simple(self, random_walk_u):
def test_random_walk_u_simple(self, random_walk_u, client_EinsteinMSD):
# regress against random_walk test data
msd_rw = MSD(random_walk_u, "all", msd_type="xyz", fft=False)
msd_rw.run()
msd_rw.run(**client_EinsteinMSD)
norm = np.linalg.norm(msd_rw.results.timeseries)
val = 3932.39927487146
assert_almost_equal(norm, val, decimal=5)
Expand All @@ -175,10 +175,10 @@ def test_random_walk_u_simple(self, random_walk_u):
class TestMSDFFT(object):

@pytest.fixture(scope="class")
def msd_fft(self, u, SELECTION):
def msd_fft(self, u, SELECTION, client_EinsteinMSD):
# fft msd
m = MSD(u, SELECTION, msd_type="xyz", fft=True)
m.run()
m.run(**client_EinsteinMSD)
return m

def test_fft_vs_simple_default(self, msd, msd_fft):
Expand All @@ -194,25 +194,29 @@ def test_fft_vs_simple_default_per_particle(self, msd, msd_fft):
assert_almost_equal(per_particle_simple, per_particle_fft, decimal=4)

@pytest.mark.parametrize("dim", ["xyz", "xy", "xz", "yz", "x", "y", "z"])
def test_fft_vs_simple_all_dims(self, u, SELECTION, dim):
def test_fft_vs_simple_all_dims(
self, u, SELECTION, dim, client_EinsteinMSD
):
# check fft and simple give same result for each dimensionality
m_simple = MSD(u, SELECTION, msd_type=dim, fft=False)
m_simple.run()
m_simple.run(**client_EinsteinMSD)
timeseries_simple = m_simple.results.timeseries
m_fft = MSD(u, SELECTION, msd_type=dim, fft=True)
m_fft.run()
m_fft.run(**client_EinsteinMSD)
timeseries_fft = m_fft.results.timeseries
assert_almost_equal(timeseries_simple, timeseries_fft, decimal=4)

@pytest.mark.parametrize("dim", ["xyz", "xy", "xz", "yz", "x", "y", "z"])
def test_fft_vs_simple_all_dims_per_particle(self, u, SELECTION, dim):
def test_fft_vs_simple_all_dims_per_particle(
self, u, SELECTION, dim, client_EinsteinMSD
):
# check fft and simple give same result for each particle in each
# dimension
m_simple = MSD(u, SELECTION, msd_type=dim, fft=False)
m_simple.run()
m_simple.run(**client_EinsteinMSD)
per_particle_simple = m_simple.results.msds_by_particle
m_fft = MSD(u, SELECTION, msd_type=dim, fft=True)
m_fft.run()
m_fft.run(**client_EinsteinMSD)
per_particle_fft = m_fft.results.msds_by_particle
assert_almost_equal(per_particle_simple, per_particle_fft, decimal=4)

Expand All @@ -228,14 +232,16 @@ def test_fft_vs_simple_all_dims_per_particle(self, u, SELECTION, dim):
("z", 1),
],
)
def test_fft_step_traj_all_dims(self, step_traj, NSTEP, dim, dim_factor):
def test_fft_step_traj_all_dims(
self, step_traj, NSTEP, dim, dim_factor, client_EinsteinMSD
):
# testing the fft algorithm on constant velocity trajectory
# this should fit the polynomial y=dim_factor*x**2
# fft based tests require a slight decrease in expected prescision
# primarily due to roundoff in fft(ifft()) calls.
# relative accuracy expected to be around ~1e-12
m_simple = MSD(step_traj, "all", msd_type=dim, fft=True)
m_simple.run()
m_simple.run(**client_EinsteinMSD)
poly = characteristic_poly(NSTEP, dim_factor)
# this was relaxed from decimal=4 for numpy=1.13 test
assert_almost_equal(m_simple.results.timeseries, poly, decimal=3)
Expand Down Expand Up @@ -265,10 +271,33 @@ def test_fft_start_stop_step_all_dims(
m_simple.results.timeseries, poly[0:990:10], decimal=3
)

def test_random_walk_u_fft(self, random_walk_u):
def test_random_walk_u_fft(self, random_walk_u, client_EinsteinMSD):
# regress against random_walk test data
msd_rw = MSD(random_walk_u, "all", msd_type="xyz", fft=True)
msd_rw.run()
msd_rw.run(**client_EinsteinMSD)
norm = np.linalg.norm(msd_rw.results.timeseries)
val = 3932.39927487146
assert_almost_equal(norm, val, decimal=5)


@pytest.mark.parametrize(
"classname,is_parallelizable",
[
(mda.analysis.msd, True),
],
)
def test_class_is_parallelizable(classname, is_parallelizable):
assert (
classname.EinsteinMSD._analysis_algorithm_is_parallelizable
== is_parallelizable
)


@pytest.mark.parametrize(
"classname,backends",
[
(mda.analysis.msd, ("serial", "multiprocessing", "dask")),
],
)
def test_supported_backends(classname, backends):
assert classname.EinsteinMSD.get_supported_backends() == backends
Loading