MDAnalysis · tanishy7777 · Jan 8, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -33,6 +33,7 @@ Enhancements
  * Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670)
  * Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824)
  * Added `precision` for XYZWriter (Issue #4775, PR #4771)
+ * Parallelize `analysis.rdf.EinsteinMSD` (Issue #4676, PR #4896)
 
 
 Changes

diff --git a/package/MDAnalysis/analysis/msd.py b/package/MDAnalysis/analysis/msd.py
@@ -242,7 +242,7 @@
 import numpy as np
 import logging
 from ..due import due, Doi
-from .base import AnalysisBase
+from .base import AnalysisBase, ResultsGroup
 from ..core import groups
 from tqdm import tqdm
 
@@ -299,8 +299,22 @@
 
 
     .. versionadded:: 2.0.0
+    .. versionchanged:: 2.9.0
+       Enabled **parallel execution** with the ``multiprocessing`` and ``dask``
+       backends; use the new method :meth:`get_supported_backends` to see all
+       supported backends.
     """
 
+    @classmethod
+    def get_supported_backends(cls):
+        return (
+            "serial",
+            "multiprocessing",
+            "dask",
+        )
+
+    _analysis_algorithm_is_parallelizable = True
+
     def __init__(self, u, select="all", msd_type="xyz", fft=True, **kwargs):
         r"""
         Parameters
@@ -333,6 +347,7 @@
         # local
         self.ag = u.select_atoms(self.select)
         self.n_particles = len(self.ag)
+        self.results._position_array = None
         self._position_array = None
 
         # result
@@ -345,6 +360,9 @@
         self.results.msds_by_particle = np.zeros(
             (self.n_frames, self.n_particles)
         )
+        self.results._position_array = np.zeros(
+            (self.n_frames, self.n_particles, self.dim_fac)
+        )
         self._position_array = np.zeros(
             (self.n_frames, self.n_particles, self.dim_fac)
         )
@@ -381,8 +399,29 @@
         self._position_array[self._frame_index] = self.ag.positions[
             :, self._dim
         ]
+        self.results._position_array[self._frame_index] = self.ag.positions[
+            :, self._dim
+        ]
+
+    # aggregator wants 'timeseries' and 'msds_by_particle' to be passed
+    # so using this as a workaround
+    @staticmethod
+    def f(arrs):
+        pass
+
+    def _get_aggregator(self):
+        return ResultsGroup(
+            lookup={
+                "_position_array": ResultsGroup.ndarray_vstack,
+                "msds_by_particle": self.f,
+                "timeseries": self.f,
+            }
+        )
 
     def _conclude(self):
+        self.results.msds_by_particle = np.zeros(
+            (self.n_frames, self.n_particles)
+        )
         if self.fft:
             self._conclude_fft()
         else:
@@ -391,6 +430,7 @@
     def _conclude_simple(self):
         r"""Calculates the MSD via the simple "windowed" algorithm."""
         lagtimes = np.arange(1, self.n_frames)
+        self._position_array = self.results._position_array
         positions = self._position_array.astype(np.float64)
         for lag in tqdm(lagtimes):
             disp = positions[:-lag, :, :] - positions[lag:, :, :]
@@ -414,7 +454,7 @@
 
                 or set fft=False"""
             )
-
+        self._position_array = self.results._position_array
         positions = self._position_array.astype(np.float64)
         for n in tqdm(range(self.n_particles)):
             self.results.msds_by_particle[:, n] = tidynamics.msd(

diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py
@@ -17,6 +17,7 @@
 from MDAnalysis.analysis.nucleicacids import NucPairDist
 from MDAnalysis.analysis.contacts import Contacts
 from MDAnalysis.analysis.density import DensityAnalysis
+from MDAnalysis.analysis.msd import EinsteinMSD
 from MDAnalysis.lib.util import is_installed
 
 
@@ -176,3 +177,8 @@ def client_Contacts(request):
 @pytest.fixture(scope="module", params=params_for_cls(DensityAnalysis))
 def client_DensityAnalysis(request):
     return request.param
+
+
+@pytest.fixture(scope="module", params=params_for_cls(EinsteinMSD))
+def client_EinsteinMSD(request):
+    return request.param
diff --git a/testsuite/MDAnalysisTests/analysis/test_msd.py b/testsuite/MDAnalysisTests/analysis/test_msd.py
@@ -58,10 +58,10 @@ def random_walk_u():
 
 
 @pytest.fixture(scope="module")
-def msd(u, SELECTION):
+def msd(u, SELECTION, client_EinsteinMSD):
     # non fft msd
     m = MSD(u, SELECTION, msd_type="xyz", fft=False)
-    m.run()
+    m.run(**client_EinsteinMSD)
     return m
 
 
@@ -76,11 +76,11 @@ def step_traj(NSTEP):  # constant velocity
 
 
 @block_import("tidynamics")
-def test_notidynamics(u, SELECTION):
+def test_notidynamics(u, SELECTION, client_EinsteinMSD):
     with pytest.raises(ImportError, match="tidynamics was not found"):
         u = mda.Universe(PSF, DCD)
         msd = MSD(u, SELECTION)
-        msd.run()
+        msd.run(**client_EinsteinMSD)
 
 
 def characteristic_poly(n, d):
@@ -125,12 +125,12 @@ def test_msdtype_error(self, u, SELECTION, msdtype):
         ],
     )
     def test_simple_step_traj_all_dims(
-        self, step_traj, NSTEP, dim, dim_factor
+        self, step_traj, NSTEP, dim, dim_factor, client_EinsteinMSD
     ):
         # testing the "simple" algorithm on constant velocity trajectory
         # should fit the polynomial y=dim_factor*x**2
         m_simple = MSD(step_traj, "all", msd_type=dim, fft=False)
-        m_simple.run()
+        m_simple.run(**client_EinsteinMSD)
         poly = characteristic_poly(NSTEP, dim_factor)
         assert_almost_equal(m_simple.results.timeseries, poly, decimal=4)
 
@@ -159,10 +159,10 @@ def test_simple_start_stop_step_all_dims(
             m_simple.results.timeseries, poly[0:990:10], decimal=4
         )
 
-    def test_random_walk_u_simple(self, random_walk_u):
+    def test_random_walk_u_simple(self, random_walk_u, client_EinsteinMSD):
         # regress against random_walk test data
         msd_rw = MSD(random_walk_u, "all", msd_type="xyz", fft=False)
-        msd_rw.run()
+        msd_rw.run(**client_EinsteinMSD)
         norm = np.linalg.norm(msd_rw.results.timeseries)
         val = 3932.39927487146
         assert_almost_equal(norm, val, decimal=5)
@@ -175,10 +175,10 @@ def test_random_walk_u_simple(self, random_walk_u):
 class TestMSDFFT(object):
 
     @pytest.fixture(scope="class")
-    def msd_fft(self, u, SELECTION):
+    def msd_fft(self, u, SELECTION, client_EinsteinMSD):
         # fft msd
         m = MSD(u, SELECTION, msd_type="xyz", fft=True)
-        m.run()
+        m.run(**client_EinsteinMSD)
         return m
 
     def test_fft_vs_simple_default(self, msd, msd_fft):
@@ -194,25 +194,29 @@ def test_fft_vs_simple_default_per_particle(self, msd, msd_fft):
         assert_almost_equal(per_particle_simple, per_particle_fft, decimal=4)
 
     @pytest.mark.parametrize("dim", ["xyz", "xy", "xz", "yz", "x", "y", "z"])
-    def test_fft_vs_simple_all_dims(self, u, SELECTION, dim):
+    def test_fft_vs_simple_all_dims(
+        self, u, SELECTION, dim, client_EinsteinMSD
+    ):
         # check fft and simple give same result for each dimensionality
         m_simple = MSD(u, SELECTION, msd_type=dim, fft=False)
-        m_simple.run()
+        m_simple.run(**client_EinsteinMSD)
         timeseries_simple = m_simple.results.timeseries
         m_fft = MSD(u, SELECTION, msd_type=dim, fft=True)
-        m_fft.run()
+        m_fft.run(**client_EinsteinMSD)
         timeseries_fft = m_fft.results.timeseries
         assert_almost_equal(timeseries_simple, timeseries_fft, decimal=4)
 
     @pytest.mark.parametrize("dim", ["xyz", "xy", "xz", "yz", "x", "y", "z"])
-    def test_fft_vs_simple_all_dims_per_particle(self, u, SELECTION, dim):
+    def test_fft_vs_simple_all_dims_per_particle(
+        self, u, SELECTION, dim, client_EinsteinMSD
+    ):
         # check fft and simple give same result for each particle in each
         # dimension
         m_simple = MSD(u, SELECTION, msd_type=dim, fft=False)
-        m_simple.run()
+        m_simple.run(**client_EinsteinMSD)
         per_particle_simple = m_simple.results.msds_by_particle
         m_fft = MSD(u, SELECTION, msd_type=dim, fft=True)
-        m_fft.run()
+        m_fft.run(**client_EinsteinMSD)
         per_particle_fft = m_fft.results.msds_by_particle
         assert_almost_equal(per_particle_simple, per_particle_fft, decimal=4)
 
@@ -228,14 +232,16 @@ def test_fft_vs_simple_all_dims_per_particle(self, u, SELECTION, dim):
             ("z", 1),
         ],
     )
-    def test_fft_step_traj_all_dims(self, step_traj, NSTEP, dim, dim_factor):
+    def test_fft_step_traj_all_dims(
+        self, step_traj, NSTEP, dim, dim_factor, client_EinsteinMSD
+    ):
         # testing the fft algorithm on constant velocity trajectory
         # this should fit the polynomial y=dim_factor*x**2
         # fft based tests require a slight decrease in expected prescision
         # primarily due to roundoff in fft(ifft()) calls.
         # relative accuracy expected to be around ~1e-12
         m_simple = MSD(step_traj, "all", msd_type=dim, fft=True)
-        m_simple.run()
+        m_simple.run(**client_EinsteinMSD)
         poly = characteristic_poly(NSTEP, dim_factor)
         # this was relaxed from decimal=4 for numpy=1.13 test
         assert_almost_equal(m_simple.results.timeseries, poly, decimal=3)
@@ -265,10 +271,33 @@ def test_fft_start_stop_step_all_dims(
             m_simple.results.timeseries, poly[0:990:10], decimal=3
         )
 
-    def test_random_walk_u_fft(self, random_walk_u):
+    def test_random_walk_u_fft(self, random_walk_u, client_EinsteinMSD):
         # regress against random_walk test data
         msd_rw = MSD(random_walk_u, "all", msd_type="xyz", fft=True)
-        msd_rw.run()
+        msd_rw.run(**client_EinsteinMSD)
         norm = np.linalg.norm(msd_rw.results.timeseries)
         val = 3932.39927487146
         assert_almost_equal(norm, val, decimal=5)
+
+
+@pytest.mark.parametrize(
+    "classname,is_parallelizable",
+    [
+        (mda.analysis.msd, True),
+    ],
+)
+def test_class_is_parallelizable(classname, is_parallelizable):
+    assert (
+        classname.EinsteinMSD._analysis_algorithm_is_parallelizable
+        == is_parallelizable
+    )
+
+
+@pytest.mark.parametrize(
+    "classname,backends",
+    [
+        (mda.analysis.msd, ("serial", "multiprocessing", "dask")),
+    ],
+)
+def test_supported_backends(classname, backends):
+    assert classname.EinsteinMSD.get_supported_backends() == backends