From 721e320733ce111c531a908f5581c24a3b542018 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Mon, 25 Mar 2024 18:45:54 +0800
Subject: [PATCH 01/41] Enhanced the code coverage for the function set_params

---
 ensemble_md/replica_exchange_EE.py            | 25 +++--
 ensemble_md/tests/test_replica_exchange_EE.py | 92 +++++++++++++++++--
 2 files changed, 103 insertions(+), 14 deletions(-)

diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py
index 572b2004..6e321382 100644
--- a/ensemble_md/replica_exchange_EE.py
+++ b/ensemble_md/replica_exchange_EE.py
@@ -268,12 +268,21 @@ def set_params(self, analysis):
                         raise ParameterError("Each number specified in 'add_swappables' should be a non-negative integer.")  # noqa: E501
 
         if self.mdp_args is not None:
+            # Note that mdp_args is a dictionary including MDP parameters DIFFERING across replicas.
+            # The value of each key should be a list of length n_sim.
+            for val in self.mdp_args.values():
+                if not isinstance(val, list):
+                    raise ParameterError("The values specified in 'mdp_args' should be lists.")
+
+                if len(set(val)) == 1:
+                    raise ParameterError("MDP parameters set by 'mdp_args' should differ across at least two replicas.")  # noqa: E501
+
             for key in self.mdp_args.keys():
                 if not isinstance(key, str):
                     raise ParameterError("All keys specified in 'mdp_args' should be strings.")
                 else:
                     if '-' in key:
-                        raise ParameterError("Parameters specified in 'mdp_args' must use underscores in place of hyphens.")  # noqa: E501
+                        raise ParameterError("ensemble_md convention: Parameters specified in 'mdp_args' must use underscores in place of hyphens.")  # noqa: E501
             for val_list in self.mdp_args.values():
                 if len(val_list) != self.n_sim:
                     raise ParameterError("The number of values specified for each key in 'mdp_args' should be the same as the number of replicas.")  # noqa: E501
@@ -318,7 +327,7 @@ def set_params(self, analysis):
             self.warnings.append('Warning: We recommend setting gen_seed as -1 so the random seed is different for each iteration.')  # noqa: E501
 
         if 'gen_vel' not in self.template or ('gen_vel' in self.template and self.template['gen_vel'] == 'no'):
-            self.warnings.append('Warning: We recommend generating new velocities for each iteration to avoid potential issues with detailed balance.')  # noqa: E501
+            self.warnings.append('Warning: We recommend generating new velocities for each iteration to avoid potential issues with the detailed balance.')  # noqa: E501
 
         if self.nst_sim % self.template['nstlog'] != 0:
             raise ParameterError(
@@ -330,16 +339,18 @@ def set_params(self, analysis):
 
         if self.template['nstexpanded'] % self.template['nstdhdl'] != 0:
             raise ParameterError(
-                'In REXEE, the parameter "nstdhdl" must be a factor of the parameter "nstexpanded", or the calculation of acceptance ratios might be wrong.')  # noqa: E501
+                'In REXEE, the parameter "nstdhdl" must be a factor of the parameter "nstexpanded", or the calculation of acceptance ratios may be wrong.')  # noqa: E501
 
         if self.mdp_args is not None:
-            if 'lmc_seed' in self.mdp_args and -1 not in self.mdp_args['lmc_seed']:
+            # Varying the following parameters may not make sense, but here we just avoid edge cases.
+            # We check these parameters as they could directly influence the correctness of the simulation.
+            if 'lmc_seed' in self.mdp_args and self.mdp_args['lmc_seed'] != [-1] * self.n_sim:
                 self.warnings.append('Warning: We recommend setting lmc_seed as -1 so the random seed is different for each iteration.')  # noqa: E501
 
-            if 'gen_seed' in self.mdp_args and -1 not in self.mdp_args['gen_seed']:
+            if 'gen_seed' in self.mdp_args and self.mdp_args['gen_seed'] != [-1] * self.n_sim:
                 self.warnings.append('Warning: We recommend setting gen_seed as -1 so the random seed is different for each iteration.')  # noqa: E501
 
-            if 'gen_vel' in self.mdp_args and 'no' in self.mdp_args['gen_vel']:
+            if 'gen_vel' in self.mdp_args and self.mdp_args['gen_vel'] != ['yes'] * self.n_sim:
                 self.warnings.append('Warning: We recommend generating new velocities for each iteration to avoid potential issues with the detailed balance.')  # noqa: E501
 
             if 'nstlog' in self.mdp_args and sum(self.nst_sim % np.array(self.mdp_args['nstlog'])) != 0:
@@ -352,7 +363,7 @@ def set_params(self, analysis):
 
             if 'nstexpanded' in self.mdp_args and 'nstdhdl' in self.mdp_args and sum(np.array(self.mdp_args['nstexpanded']) % np.array(self.mdp_args['nstdhdl'])) != 0:  # noqa: E501
                 raise ParameterError(
-                    'In REXEE, the parameter "nstdhdl" must be a factor of the parameter "nstexpanded", or the calculation of acceptance ratios might be wrong.')  # noqa: E501
+                    'In REXEE, the parameter "nstdhdl" must be a factor of the parameter "nstexpanded", or the calculation of acceptance ratios may be wrong.')  # noqa: E501
 
         if 'pull' in self.template and self.template['pull'] == 'yes':
             pull_ncoords = self.template['pull_ncoords']
diff --git a/ensemble_md/tests/test_replica_exchange_EE.py b/ensemble_md/tests/test_replica_exchange_EE.py
index fa31ff64..60c38e14 100644
--- a/ensemble_md/tests/test_replica_exchange_EE.py
+++ b/ensemble_md/tests/test_replica_exchange_EE.py
@@ -104,15 +104,31 @@ def test_set_params_error(self, params_dict):
         # 7. Boolean parameters
         check_param_error(params_dict, 'msm', "The parameter 'msm' should be a boolean variable.", 3, False)
 
-        # 8. nstlog > nst_sim
+        # 8. Errors related to nstlog and nstdhdl
         mdp = gmx_parser.MDP(os.path.join(input_path, "expanded.mdp"))  # A perfect mdp file
+
+        # 8-1. nstlog is not a factor of nst_sim
         mdp['nstlog'] = 200
         mdp.write(os.path.join(input_path, "expanded_test.mdp"))
         params_dict['mdp'] = 'ensemble_md/tests/data/expanded_test.mdp'
         params_dict['nst_sim'] = 100
         with pytest.raises(ParameterError, match='The parameter "nstlog" must be a factor of the parameter "nst_sim" specified in the YAML file.'):  # noqa: E501
             get_REXEE_instance(params_dict)
-        params_dict['nst_sim'] = 500
+
+        # 8-2. nstdhdl is not a factor of nst_sim
+        mdp['nstlog'] = 100
+        mdp['nstdhdl'] = 200
+        mdp.write(os.path.join(input_path, "expanded_test.mdp"))
+        with pytest.raises(ParameterError, match='The parameter "nstdhdl" must be a factor of the parameter "nst_sim" specified in the YAML file.'):  # noqa: E501
+            get_REXEE_instance(params_dict)
+
+        # 8-3. nstexpanded is not a factor of nstdhdl
+        mdp['nstdhdl'] = 100
+        mdp['nstexpanded'] = 50
+        mdp.write(os.path.join(input_path, "expanded_test.mdp"))
+        with pytest.raises(ParameterError, match='In REXEE, the parameter "nstdhdl" must be a factor of the parameter "nstexpanded", or the calculation of acceptance ratios may be wrong.'):  # noqa: E501
+            get_REXEE_instance(params_dict)
+
         os.remove(os.path.join(input_path, "expanded_test.mdp"))
 
         # 9. n_sub < 1
@@ -121,7 +137,55 @@ def test_set_params_error(self, params_dict):
         # Note that the parentheses are special characters that need to be escaped in regular expressions
         with pytest.raises(ParameterError, match=r'There must be at least two states for each replica \(current value: -6\). The current specified configuration \(n_tot=9, n_sim=4, s=5\) does not work for REXEE.'):  # noqa: E501
             get_REXEE_instance(params_dict)
-        params_dict['s'] = 1
+
+        # 10. s < 0
+        params_dict['s'] = -1
+        with pytest.raises(ParameterError, match="The parameter 's' should be non-negative."):
+            get_REXEE_instance(params_dict)
+        params_dict['s'] = 1  # set back to a normal value
+
+        # 11. Cases for MT-REXEE (relevant parameters: add_swappables, modify_coords, etc.)
+        params_dict['gro'] = ['ensemble_md/tests/data/sys.gro']
+        with pytest.raises(ParameterError, match="The number of the input GRO files must be the same as the number of replicas, if multiple are specified."):  # noqa: E501
+            get_REXEE_instance(params_dict)
+        params_dict['gro'] = 'ensemble_md/tests/data/sys.gro'  # set back to a normal value
+
+        # 12. Test mdp_args
+        params_dict['mdp_args'] = 3
+        with pytest.raises(ParameterError, match="The parameter 'mdp_args' should be a dictionary."):
+            get_REXEE_instance(params_dict)
+
+        params_dict['mdp_args'] = {'ref_p': 1.0}
+        with pytest.raises(ParameterError, match="The values specified in 'mdp_args' should be lists."):
+            get_REXEE_instance(params_dict)
+
+        params_dict['mdp_args'] = {'ref_p': [1.0, 1.0, 1.0, 1.0]}
+        with pytest.raises(ParameterError, match="MDP parameters set by 'mdp_args' should differ across at least two replicas."):  # noqa: E501
+            get_REXEE_instance(params_dict)
+
+        params_dict['mdp_args'] = {5: [1, 1, 1, 1.01]}  # set back to a normal value
+        with pytest.raises(ParameterError, match="All keys specified in 'mdp_args' should be strings."):
+            get_REXEE_instance(params_dict)
+
+        params_dict['mdp_args'] = {'ref-p': [1.0, 1.01, 1.02, 1.03]}
+        with pytest.raises(ParameterError, match="ensemble_md convention: Parameters specified in 'mdp_args' must use underscores in place of hyphens."):  # noqa: E501
+            get_REXEE_instance(params_dict)
+
+        params_dict['mdp_args'] = {'ref_p': [1.0, 1.01, 1.02]}
+        with pytest.raises(ParameterError, match="The number of values specified for each key in 'mdp_args' should be the same as the number of replicas."):  # noqa: E501
+            get_REXEE_instance(params_dict)
+
+        params_dict['mdp_args'] = {'nstlog': [200, 100, 100, 100]}
+        with pytest.raises(ParameterError, match='The parameter "nstlog" must be a factor of the parameter "nst_sim" specified in the YAML file.'):  # noqa: E501
+            get_REXEE_instance(params_dict)
+
+        params_dict['mdp_args'] = {'nstdhdl': [200, 100, 100, 100]}
+        with pytest.raises(ParameterError, match='The parameter "nstdhdl" must be a factor of the parameter "nst_sim" specified in the YAML file.'):  # noqa: E501
+            get_REXEE_instance(params_dict)
+
+        params_dict['mdp_args'] = {'nstdhdl': [20, 10, 10, 10], 'nstexpanded': [10, 50, 10, 10]}
+        with pytest.raises(ParameterError, match='In REXEE, the parameter "nstdhdl" must be a factor of the parameter "nstexpanded", or the calculation of acceptance ratios may be wrong.'):  # noqa: E501
+            get_REXEE_instance(params_dict)
 
     def test_set_params_warnings(self, params_dict):
         # 1. Non-recognizable parameter in the YAML file
@@ -135,21 +199,35 @@ def test_set_params_warnings(self, params_dict):
         mdp['lmc_seed'] = 1000
         mdp['gen_seed'] = 1000
         mdp['wl_scale'] = ''
+        mdp['gen_vel'] = 'no'
         mdp.write(os.path.join(input_path, "expanded_test.mdp"))
 
         params_dict['mdp'] = 'ensemble_md/tests/data/expanded_test.mdp'
         params_dict['N_cutoff'] = 1000
-        REXEE = get_REXEE_instance(params_dict)
+        REXEE_1 = get_REXEE_instance(params_dict)
 
         warning_1 = 'Warning: The weight correction/weight combination method is specified but will not be used since the weights are fixed.'  # noqa: E501
         warning_2 = 'Warning: We recommend setting lmc_seed as -1 so the random seed is different for each iteration.'
         warning_3 = 'Warning: We recommend setting gen_seed as -1 so the random seed is different for each iteration.'
-        assert warning_1 in REXEE.warnings
-        assert warning_2 in REXEE.warnings
-        assert warning_3 in REXEE.warnings
+        warning_4 = 'Warning: We recommend generating new velocities for each iteration to avoid potential issues with the detailed balance.'  # noqa: E501
+
+        assert warning_1 in REXEE_1.warnings
+        assert warning_2 in REXEE_1.warnings
+        assert warning_3 in REXEE_1.warnings
+        assert warning_4 in REXEE_1.warnings
 
         os.remove(os.path.join(input_path, "expanded_test.mdp"))
 
+        # 2. Warnings related to the mdp file (for cases where mdp_args is not None)
+        mdp = gmx_parser.MDP(os.path.join(input_path, "expanded.mdp"))  # A perfect mdp file
+        mdp.write(os.path.join(input_path, "expanded_test.mdp"))
+
+        params_dict['mdp_args'] = {'lmc_seed': [-1, -1, -1, 0], 'gen_seed': [-1, -1, -1, 0], 'gen_vel': ['no', 'no', 'no', 'yes']}  # noqa: E501
+        REXEE_2 = get_REXEE_instance(params_dict)
+        assert warning_2 in REXEE_2.warnings
+        assert warning_3 in REXEE_2.warnings
+        assert warning_4 in REXEE_2.warnings
+
     def test_set_params(self, params_dict):
         # 0. Get an REXEE instance to test
         REXEE = get_REXEE_instance(params_dict)

From 206fc1155e87fc71822d2ea24dae72760b722d43 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Mon, 25 Mar 2024 19:20:40 +0800
Subject: [PATCH 02/41] Added test relevant to the MDP parameter 'pull'.

---
 ensemble_md/replica_exchange_EE.py            |   4 +-
 ensemble_md/tests/data/expanded_pull.mdp      | 122 ++++++++++++++++++
 ensemble_md/tests/test_replica_exchange_EE.py |  25 ++++
 3 files changed, 148 insertions(+), 3 deletions(-)
 create mode 100644 ensemble_md/tests/data/expanded_pull.mdp

diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py
index 6e321382..31275543 100644
--- a/ensemble_md/replica_exchange_EE.py
+++ b/ensemble_md/replica_exchange_EE.py
@@ -372,9 +372,7 @@ def set_params(self, analysis):
                 if self.template[f'pull_coord{i+1}_geometry'] == 'distance':
                     if self.template[f'pull_coord{i+1}_start'] == 'yes':
                         self.set_ref_dist.append(True)  # starting from the second iteration, set pull_coord*_init.
-                        if 'pull_nstxout' not in self.template:
-                            self.warnings.append('A non-zero value should be specified for pull_nstxout if pull_coord*_start is set to yes.')  # noqa: E501
-                        if self.template['pull_nstxout'] == 0:
+                        if 'pull_nstxout' not in self.template or self.template['pull_nstxout'] == 0:
                             self.warnings.append('A non-zero value should be specified for pull_nstxout if pull_coord*_start is set to yes.')  # noqa: E501
                     else:
                         self.set_ref_dist.append(False)  # Here we assume that the user know what reference distance to use.  # noqa: E501
diff --git a/ensemble_md/tests/data/expanded_pull.mdp b/ensemble_md/tests/data/expanded_pull.mdp
new file mode 100644
index 00000000..baa4e137
--- /dev/null
+++ b/ensemble_md/tests/data/expanded_pull.mdp
@@ -0,0 +1,122 @@
+; Run control
+integrator = md-vv
+tinit = 0
+dt = 0.002
+nsteps = 100000000
+nstcomm = 10
+
+; Output control
+nstlog = 1000
+nstcalcenergy = 10
+nstenergy = 1000
+nstxout_compressed = 1000
+
+; Neighborsearching and short-range nonbonded interactions
+nstlist = 10
+ns_type = grid
+pbc = xyz
+rlist = 1.3
+
+; Electrostatics
+coulombtype = PME-switch
+rcoulomb_switch = 0.88
+rcoulomb = 0.9
+
+; van der Waals
+vdw_type = switch
+rvdw_switch = 0.85
+rvdw = 0.9
+
+; Apply long range dispersion corrections for Energy and Pressure
+DispCorr = AllEnerPres
+
+; Spacing for the PME/PPPM FFT grid
+fourierspacing = 0.12
+
+; EWALD/PME/PPPM parameters
+pme_order = 4
+ewald_rtol = 1e-05
+ewald_geometry = 3d
+epsilon_surface = 0
+optimize_fft = yes
+
+; Temperature coupling
+tcoupl = nose-hoover
+nsttcouple = 10
+tc_grps = System
+tau_t = 1.0
+ref_t = 300
+
+; Pressure coupling is on for NPT
+pcoupl = no
+
+gen_vel = yes
+gen_temp = 300
+gen_seed = -1
+
+; options for bonds
+constraints = h-bonds
+
+; Type of constraint algorithm
+constraint_algorithm = shake
+shake_tol = 1e-05
+
+; Free energy calculation
+free_energy = expanded
+calc_lambda_neighbors = -1
+sc_alpha = 0.5
+sc_power = 1
+sc_sigma = 0.5
+couple_moltype = MOL
+couple_lambda0 = vdw-q
+couple_lambda1 = none
+couple_intramol = no
+init_lambda_state = 0
+nstdhdl = 100
+dhdl_print_energy = total
+
+; Seed for Monte Carlo in lambda space
+symmetrized_transition_matrix = no
+nst_transition_matrix = 100000
+; wl-scale                 = 0.8
+; wl-ratio                 = 0.7
+; init-wl-delta            = 10
+
+; expanded ensemble variables
+nstexpanded = 100
+lmc_stats = no
+lmc_move = metropolized-gibbs
+; lmc-weights-equil       = wl-delta
+; weight-equil-wl-delta   = 0.001
+; wl-oneovert             = yes
+
+; lambda-states          = 1      2      3      4      5      6      7      8      9      10     11     12     13     14     15     16     17     18     19     20     21     22     23     24     25     26     27     28     29     30     31     32     33     34     35     36     37     38     39     40
+
+coul_lambdas = 0.0 0.05 0.1 0.15 0.2 0.25 0.3 0.35 0.4 0.45 0.5 0.55 0.6 0.65 0.7 0.75 0.8 0.85 0.9 0.95 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
+vdw_lambdas = 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.1 0.2 0.3 0.4 0.45 0.5 0.55 0.6 0.63 0.66 0.69 0.72 0.75 0.78 0.81 0.84 0.88 0.92 1.0
+
+; PULL CODE
+pull = yes
+pull_ngroups = 2
+pull_ncoords = 1
+pull_group1_name = HOS
+pull_group2_name = MOL
+pull_pbc_ref_prev_step_com = yes
+
+pull_coord1_groups = 1 2
+pull_coord1_type = umbrella
+pull_coord1_geometry = distance
+pull_coord1_dim = Y Y Y
+pull_coord1_origin = 0.0 0.0 0.0
+pull_coord1_vec = 0.0 0.0 0.0
+pull_coord1_start = yes
+pull_coord1_init = 0
+pull_coord1_rate = 0
+pull_coord1_k = 0
+pull_coord1_kB = 1000
+pull_nstfout = 400000
+pull_nstxout = 1000
+pull_print_ref_value = yes
+
+restraint_lambdas = 0.0 0.0 0.0 0.0 0.0 0.0 0.01 0.05 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 0.95 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
+init_lambda_weights = 0.0 57.88597 112.71883 163.84425 210.48097 253.80261 294.79849 333.90408 370.82669 406.02515 438.53116 468.53751 496.24649 521.58417 544.57404 565.26697 583.7337 599.60651 613.43958 624.70471 633.95947 638.29785 642.44977 646.33551 649.91626 651.54779 652.93359 654.13263 654.94073 655.13086 655.07239 654.66443 653.68683 652.32123 650.72308 649.2381 647.94586 646.599 645.52063 643.99133
diff --git a/ensemble_md/tests/test_replica_exchange_EE.py b/ensemble_md/tests/test_replica_exchange_EE.py
index 60c38e14..9aaa927d 100644
--- a/ensemble_md/tests/test_replica_exchange_EE.py
+++ b/ensemble_md/tests/test_replica_exchange_EE.py
@@ -186,6 +186,31 @@ def test_set_params_error(self, params_dict):
         params_dict['mdp_args'] = {'nstdhdl': [20, 10, 10, 10], 'nstexpanded': [10, 50, 10, 10]}
         with pytest.raises(ParameterError, match='In REXEE, the parameter "nstdhdl" must be a factor of the parameter "nstexpanded", or the calculation of acceptance ratios may be wrong.'):  # noqa: E501
             get_REXEE_instance(params_dict)
+        params_dict['mdp_args'] = None  # set back to a normal value
+
+        # 13. Test the parameter 'pull'
+        params_dict['nst_sim'] = 2000
+        mdp = gmx_parser.MDP(os.path.join(input_path, "expanded_pull.mdp"))
+        params_dict['mdp'] = 'ensemble_md/tests/data/expanded_test.mdp'
+
+        mdp['pull_coord1_geometry'] = 'direction'
+        mdp.write(params_dict['mdp'])
+        get_REXEE_instance(params_dict)
+
+        mdp['pull_coord1_geometry'] = 'distance'  # set back to the original value
+        mdp['pull_coord1_start'] = 'no'
+        mdp.write(params_dict['mdp'])
+        get_REXEE_instance(params_dict)
+
+        mdp['pull_coord1_start'] = 'yes'
+        mdp.write(params_dict['mdp'])
+        mdp['pull_nstxout'] = 0
+        mdp.write(params_dict['mdp'])
+        REXEE_pull = get_REXEE_instance(params_dict)
+        warning = 'A non-zero value should be specified for pull_nstxout if pull_coord*_start is set to yes.'
+        assert warning in REXEE_pull.warnings
+
+        os.remove(os.path.join(input_path, "expanded_test.mdp"))
 
     def test_set_params_warnings(self, params_dict):
         # 1. Non-recognizable parameter in the YAML file

From 06070b5c0aaf4bd26755331fcbad74ac63d1b72c Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Mon, 25 Mar 2024 23:35:14 +0800
Subject: [PATCH 03/41] Enhanced the code coverage of replica_exchange_EE.py

---
 ensemble_md/cli/run_REXEE.py                  |  3 +-
 ensemble_md/replica_exchange_EE.py            | 15 +++-
 ensemble_md/tests/data/pullx.xvg              | 28 +++++++
 ensemble_md/tests/test_replica_exchange_EE.py | 84 ++++++++++++++++++-
 4 files changed, 123 insertions(+), 7 deletions(-)
 create mode 100644 ensemble_md/tests/data/pullx.xvg

diff --git a/ensemble_md/cli/run_REXEE.py b/ensemble_md/cli/run_REXEE.py
index 2dfc2193..dc7b18aa 100644
--- a/ensemble_md/cli/run_REXEE.py
+++ b/ensemble_md/cli/run_REXEE.py
@@ -132,7 +132,8 @@ def main():
         start_idx = comm.bcast(start_idx, root=0)  # so that all the ranks are aware of start_idx
 
     # 2-3. Get the reference distance for the distance restraint specified in the pull code, if any.
-    REXEE.get_ref_dist()
+    pullx_file = 'sim_0/iteration_0/pullx.xvg'
+    REXEE.get_ref_dist(pullx_file)
 
     for i in range(start_idx, REXEE.n_iter):
         # For a large code block like below executed on rank 0, we try to catch any exception and abort the simulation.
diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py
index 31275543..3612500b 100644
--- a/ensemble_md/replica_exchange_EE.py
+++ b/ensemble_md/replica_exchange_EE.py
@@ -459,7 +459,7 @@ def check_gmx_executable(self):
                     self.gmx_version = line.split()[-1]
                     break
         except subprocess.CalledProcessError:
-            print(f"{self.gmx_executable} is not available on this system.")
+            print(f"{self.gmx_executable} is not available.")
         except Exception as e:
             print(f"An error occurred:\n{e}")
 
@@ -592,15 +592,21 @@ def initialize_MDP(self, idx):
 
         return MDP
 
-    def get_ref_dist(self):
+    def get_ref_dist(self, pullx_file = 'sim_0/iteration_0/pullx.xvg'):
         """
         Gets the reference distance(s) to use starting from the second iteration if distance restraint(s) are used.
         Specifically, a reference distance determined here is the initial COM distance between the pull groups
         in the input GRO file. This function initializes the attribute :code:`ref_dist`.
+        
+        Parameter
+        ---------
+        pullx_file : str
+            The path to the pullx file whose initial value will be used as the reference distance.
+            Usually, this should be the path of the pullx file of the first iteration. The default
+            is :code:`sim_0/iteration_0/pullx.xvg`.
         """
         if hasattr(self, 'set_ref_dist'):
             self.ref_dist = []
-            pullx_file = 'sim_0/iteration_0/pullx.xvg'
             for i in range(len(self.set_ref_dist)):
                 if self.set_ref_dist[i] is True:
                     # dist = list(extract_dataframe(pullx_file, headers=headers)[f'{i+1}'])[0]
@@ -900,6 +906,7 @@ def get_swapping_pattern(self, dhdl_files, states):
                 # This should only happen when the method of exhaustive swaps is used.
                 if i == 0:
                     self.n_empty_swappable += 1
+                    print('No swap is proposed because there is no swappable pair at all.')
                 break
             else:
                 self.n_swap_attempts += 1
@@ -908,7 +915,7 @@ def get_swapping_pattern(self, dhdl_files, states):
 
                 swap = ReplicaExchangeEE.propose_swap(swappables)
                 print(f'\nProposed swap: {swap}')
-                if swap == []:
+                if swap == []:  # the same as len(swappables) == 0, self.proposal must not be exhaustive if this line is reached.
                     self.n_empty_swappable += 1
                     print('No swap is proposed because there is no swappable pair at all.')
                     break  # no need to re-identify swappable pairs and draw new samples
diff --git a/ensemble_md/tests/data/pullx.xvg b/ensemble_md/tests/data/pullx.xvg
new file mode 100644
index 00000000..526e0665
--- /dev/null
+++ b/ensemble_md/tests/data/pullx.xvg
@@ -0,0 +1,28 @@
+# This file was created Thu Feb 15 02:05:13 2024
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/bio230014p/wehs7661/EEXE_experiments/CB7-10/complex/REXEE/fixed/Group_1/test_1/rep_1/sim_0/iteration_0
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# GROwing Monsters And Cloning Shrimps
+#
+@    title "Pull COM"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "Position (nm)"
+@TYPE xy
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "1"
+@ s1 legend "1 ref"
+0.0000  0.428422        0.428422
+2.0000  0.457696        0.428422
+4.0000  0.374694        0.428422
diff --git a/ensemble_md/tests/test_replica_exchange_EE.py b/ensemble_md/tests/test_replica_exchange_EE.py
index 9aaa927d..7b6bac7f 100644
--- a/ensemble_md/tests/test_replica_exchange_EE.py
+++ b/ensemble_md/tests/test_replica_exchange_EE.py
@@ -396,9 +396,21 @@ def test_print_params(self, capfd, params_dict):
         L += "Note that the input MDP file has been reformatted by replacing hypens with underscores. The original mdp file has been renamed as *backup.mdp.\n"  # noqa: E501
         assert out_2 == L
 
+        REXEE.gro = ['ensemble_md/tests/data/sys.gro', 'ensemble_md/tests/data/sys.gro']  # noqa: E501
+        REXEE.top = ['ensemble_md/tests/data/sys.top', 'ensemble_md/tests/data/sys.top']
+        REXEE.mdp_args = {'ref_p': [1.0, 1.01, 1.02, 1.03], 'ref_t': [298, 300, 302, 303]}
+        REXEE.print_params()
+        out_3, err = capfd.readouterr()
+        line_1 = 'Simulation inputs: ensemble_md/tests/data/sys.gro, ensemble_md/tests/data/sys.gro, ensemble_md/tests/data/sys.top, ensemble_md/tests/data/sys.top, ensemble_md/tests/data/expanded.mdp\n'  # noqa: E501
+        line_2 = 'MDP parameters differing across replicas:\n  - ref_p: [1.0, 1.01, 1.02, 1.03]\n  - ref_t: [298, 300, 302, 303]'  # noqa: E501
+        assert line_1 in out_3
+        assert line_2 in out_3
+
     def test_initialize_MDP(self, params_dict):
+        params_dict['mdp_args'] = {'ref_p': [1.0, 1.01, 1.02, 1.03], 'ref_t': [298, 300, 302, 303]}
         REXEE = get_REXEE_instance(params_dict)
         MDP = REXEE.initialize_MDP(2)  # the third replica
+        assert MDP["ref_p"] == 1.02
         assert MDP["nsteps"] == 500
         assert all(
             [
@@ -420,6 +432,12 @@ def test_initialize_MDP(self, params_dict):
             [a == b for a, b in zip(MDP["init_lambda_weights"], [0, 0, 0, 0, 0, 0])]
         )
 
+    def test_get_ref_dist(self, params_dict):
+        params_dict['set_ref_dist'] = [True]
+        REXEE = get_REXEE_instance(params_dict)
+        REXEE.get_ref_dist('ensemble_md/tests/data/pullx.xvg')
+        REXEE.ref_dist = [0.428422]
+
     def test_update_MDP(self, params_dict):
         new_template = "ensemble_md/tests/data/expanded.mdp"
         iter_idx = 3
@@ -430,13 +448,22 @@ def test_update_MDP(self, params_dict):
             [0, 0, 0, 0, 0, 0],
             [3.48, 2.78, 3.21, 4.56, 8.79, 0.48],
             [8.45, 0.52, 3.69, 2.43, 4.56, 6.73], ]
+        counts = [
+            [4, 11, 9, 9, 11, 6],
+            [9, 8, 8, 11, 7, 7],
+            [3, 1, 1, 9, 15, 21],
+            [0, 0, 0, 1, 18, 31],
+        ]
+        params_dict['set_ref_dist'] = [True]
 
         REXEE = get_REXEE_instance(params_dict)
-        REXEE.equil = [-1, 1, 0, -1]  # i.e. the 3rd replica will use fixed weights in the next iteration
+        REXEE.equil = [-1, 1, 0, -1]  # i.e., the 3rd replica will use fixed weights in the next iteration
         MDP_1 = REXEE.update_MDP(
             new_template, 2, iter_idx, states, wl_delta, weights)  # third replica
+        
+        REXEE.get_ref_dist('ensemble_md/tests/data/pullx.xvg')  # so that we can test the pull code
         MDP_2 = REXEE.update_MDP(
-            new_template, 3, iter_idx, states, wl_delta, weights)  # fourth replica
+            new_template, 3, iter_idx, states, wl_delta, weights, counts)  # fourth replica
 
         assert MDP_1["tinit"] == MDP_2["tinit"] == 3
         assert MDP_1["nsteps"] == MDP_2["nsteps"] == 500
@@ -461,6 +488,9 @@ def test_update_MDP(self, params_dict):
                 )
             ]
         )
+        assert MDP_2['init_histogram_counts'] == [0, 0, 0, 1, 18, 31]
+        assert MDP_2['pull_coord1_start'] == 'no'
+        assert MDP_2['pull_coord1_init'] == 0.428422
 
     def test_extract_final_dhdl_info(self, params_dict):
         REXEE = get_REXEE_instance(params_dict)
@@ -488,11 +518,48 @@ def test_extract_final_log_info(self, params_dict):
             [0, 0, 0, 1, 18, 31], ]
         assert REXEE.equil == [-1, -1, -1, -1]
 
+        # Below is a case where one of the replicas (the first replica) got equilibrated
+        log_files[0] = os.path.join(input_path, "log/case2_1.log")  # equilibrated weights
+        wl_delta, weights, counts = REXEE.extract_final_log_info(log_files)
+        assert np.allclose(REXEE.equil, [6.06, -1, -1, -1])
+        assert REXEE.equilibrated_weights == [[0.00000, 1.40453, 2.85258, 2.72480, 3.46220, 5.88607], [], [], []]
+
     def test_get_averaged_weights(self, params_dict):
         REXEE = get_REXEE_instance(params_dict)
         log_files = [
             os.path.join(input_path, f"log/EXE_{i}.log") for i in range(REXEE.n_sim)]
         avg, err = REXEE.get_averaged_weights(log_files)
+        assert REXEE.current_wl_delta == [0.4, 0.5, 0.5, 0.5]
+        assert REXEE.updating_weights == [
+            [
+                [0, 3.83101, 4.95736, 5.63808, 6.07220, 6.13408],
+                [0, 3.43101, 3.75736, 5.23808, 4.87220, 5.33408],
+                [0, 2.63101, 2.95736, 5.23808, 4.47220, 5.73408],
+                [0, 1.83101, 2.55736, 4.43808, 4.47220, 6.13408],
+                [0, 1.03101, 2.55736, 3.63808, 4.47220, 6.13408],
+            ],  # the weights of the first replica at 5 different time frames
+            [
+                [0, 0.72635, 0.80707, 1.44120, 2.10308, 4.03106],
+                [0, 0.72635, 1.30707, 1.44120, 2.10308, 4.53106],
+                [0, 0.72635, 2.80707, 2.94120, 4.10308, 6.53106],
+                [0, 1.72635, 2.30707, 2.44120, 5.10308, 6.53106],
+                [0, 1.22635, 2.30707, 2.44120, 4.10308, 6.03106],
+            ],  # the weights of the second replica at 5 different time frames
+            [
+                [0, -0.33569, -0.24525, 2.74443, 4.59472, 7.70726],
+                [0, -0.33569, -0.24525, 2.74443, 3.59472, 3.70726],
+                [0, -0.33569, -0.24525, 2.74443, 2.09472, 0.20726],
+                [0, -0.33569, -0.24525, 1.74443, -0.90528, -0.79274],
+                [0, 0.66431, 1.25475, 0.24443, 0.59472, 0.70726]
+            ],  # the weights of the third replica at 5 different time frames
+            [
+                [0, 0.09620, 1.59937, -4.31679, -14.89436, -16.08701],
+                [0, 0.09620, 1.59937, -4.31679, -15.89436, -20.08701],
+                [0, 0.09620, 1.59937, -4.31679, -18.39436, -22.58701],
+                [0, 0.09620, 1.59937, -4.31679, -20.39436, -25.58701],
+                [0, 0.09620, 1.59937, -4.31679, -22.89436, -28.08701]
+            ]
+        ]
         assert np.allclose(avg[0],  [0, 2.55101, 3.35736, 4.83808, 4.8722, 5.89408])
         assert np.allclose(err[0], [0, 1.14542569, 1.0198039, 0.8, 0.69282032, 0.35777088])
 
@@ -584,6 +651,19 @@ def test_get_swapping_pattern(self, params_dict):
         assert pattern_4_2 == [1, 0, 3, 2]
         assert swap_list_4_2 == [(2, 3), (0, 1)]
 
+        # Case 4-3: REXEE.proposal is set to exhaustive but there is only one swappable pair anyway.
+        random.seed(0)
+        REXEE = get_REXEE_instance(params_dict)
+        REXEE.proposal = 'exhaustive'
+        states = [0, 2, 2, 8]  # swappable pair: [(1, 2)], swap: (1, 2), accept
+        f = copy.deepcopy(dhdl_files)
+        pattern_4_3, swap_list_4_3 = REXEE.get_swapping_pattern(f, states)
+        assert REXEE.n_swap_attempts == 1
+        assert REXEE.n_rejected == 0
+        assert pattern_4_3 == [0, 2, 1, 3]
+        assert swap_list_4_3 == [(1, 2)]
+
+
     def test_calc_prob_acc(self, capfd, params_dict):
         # k = 1.380649e-23; NA = 6.0221408e23; T = 298; kT = k * NA * T / 1000 = 2.4777098766670016
         REXEE = get_REXEE_instance(params_dict)

From fbcab8dc256cfb224ff722b60654c386bd4d5594 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Mon, 25 Mar 2024 23:53:27 +0800
Subject: [PATCH 04/41] Added a test using mpi4py

---
 .circleci/config.yml               | 1 +
 ensemble_md/tests/test_mpi_func.py | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 ensemble_md/tests/test_mpi_func.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 6d3b09e6..2f31866a 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -56,6 +56,7 @@ jobs:
             pip3 install pytest 
             pip3 install pytest-cov
             pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/
+            mpirun -np 4 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/test_mpi.py --with-mpi
 
       - run:
           name: CodeCov
diff --git a/ensemble_md/tests/test_mpi_func.py b/ensemble_md/tests/test_mpi_func.py
new file mode 100644
index 00000000..997dd452
--- /dev/null
+++ b/ensemble_md/tests/test_mpi_func.py
@@ -0,0 +1,7 @@
+import pytest
+from mpi4py import MPI
+
+@pytest.mark.mpi
+def test_size():
+    comm = MPI.COMM_WORLD
+    assert comm.size == 4

From 4a591cbaca91b902fab140c035e119263663aacb Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 01:25:32 +0800
Subject: [PATCH 05/41] Added the YAML parameter working_dir

---
 .circleci/config.yml               |  3 ++-
 docs/simulations.rst               |  6 ++++--
 ensemble_md/replica_exchange_EE.py | 15 +++++++++------
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 2f31866a..13b0fb97 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -53,7 +53,8 @@ jobs:
           name: Run unit tests
           command: |
             source $HOME/pkgs/bin/GMXRC
-            pip3 install pytest 
+            pip3 install pytest
+            pip3 install pytest-mpi
             pip3 install pytest-cov
             pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/
             mpirun -np 4 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/test_mpi.py --with-mpi
diff --git a/docs/simulations.rst b/docs/simulations.rst
index 9d344a04..bffd7b40 100644
--- a/docs/simulations.rst
+++ b/docs/simulations.rst
@@ -222,8 +222,8 @@ include parameters for data analysis here.
       :code:`/usr/local/gromacs/bin/gmx`, the path returned by the command :code:`which gmx`) should be used.
       Note that REXEE only works with MPI-enabled GROMACS. 
 
-3.2. Input files
-----------------
+3.2. Input settings
+-------------------
 
   - :code:`gro`: (Required)
       The input system configuration in the form of GRO file(s) used to initiate the REXEE simulation. If only one GRO file is specified,
@@ -242,6 +242,8 @@ include parameters for data analysis here.
       exchanges only occur in the end states, then one could have :math:`λ` values like :code:`0.0 0.3 0.7 1.0 0.0 0.3 ...`. Notably, unlike
       the parameters :code:`gro` and :code:`top`, only one MDP file can be specified for the parameter :code:`mdp`. If you wish to use
       different parameters for different replicas, please use the parameter :code:`mdp_args`.
+  - :code:`working_dir`: (Optional, Default: :code:`os.getcwd()`)
+      The working directory where the REXEE simulation will be performed. If not specified, the current working directory will be used.
   - :code:`modify_coords`: (Optional, Default: :code:`None`)
       The name of the Python module (without including the :code:`.py` extension) for modifying the output coordinates of the swapping replicas
       before the coordinate exchange, which is generally required in REXEE simulations for multiple serial mutations.
diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py
index 3612500b..c5cfe0d0 100644
--- a/ensemble_md/replica_exchange_EE.py
+++ b/ensemble_md/replica_exchange_EE.py
@@ -152,6 +152,7 @@ def set_params(self, analysis):
         # Step 3: Handle the optional YAML parameters
         # Key: Optional argument; Value: Default value
         optional_args = {
+            "working_dir": os.getcwd(),
             "add_swappables": None,
             "modify_coords": None,
             "nst_sim": None,
@@ -592,7 +593,7 @@ def initialize_MDP(self, idx):
 
         return MDP
 
-    def get_ref_dist(self, pullx_file = 'sim_0/iteration_0/pullx.xvg'):
+    def get_ref_dist(self, pullx_file=None):
         """
         Gets the reference distance(s) to use starting from the second iteration if distance restraint(s) are used.
         Specifically, a reference distance determined here is the initial COM distance between the pull groups
@@ -605,6 +606,8 @@ def get_ref_dist(self, pullx_file = 'sim_0/iteration_0/pullx.xvg'):
             Usually, this should be the path of the pullx file of the first iteration. The default
             is :code:`sim_0/iteration_0/pullx.xvg`.
         """
+        if pullx_file is None:
+            pullx_file = f"{self.working_dir}/sim_0/iteration_0/pullx.xvg"
         if hasattr(self, 'set_ref_dist'):
             self.ref_dist = []
             for i in range(len(self.set_ref_dist)):
@@ -1320,14 +1323,14 @@ def _run_grompp(self, n, swap_pattern):
             arguments = [self.gmx_executable, 'grompp']
 
             # Input files
-            mdp = f"sim_{i}/iteration_{n}/{self.mdp.split('/')[-1]}"
+            mdp = f"{self.working_dir}/sim_{i}/iteration_{n}/{self.mdp.split('/')[-1]}"
             if n == 0:
                 if isinstance(self.gro, list):
                     gro = f"{self.gro[i]}"
                 else:
                     gro = f"{self.gro}"
             else:
-                gro = f"sim_{swap_pattern[i]}/iteration_{n-1}/confout.gro"  # This effectively swap out GRO files
+                gro = f"{self.working_dir}/sim_{swap_pattern[i]}/iteration_{n-1}/confout.gro"  # This effectively swap out GRO files
 
             if isinstance(self.top, list):
                 top = f"{self.top[i]}"
@@ -1339,8 +1342,8 @@ def _run_grompp(self, n, swap_pattern):
 
             # Add output file arguments
             arguments.extend([
-                "-o", f"sim_{i}/iteration_{n}/sys_EE.tpr",
-                "-po", f"sim_{i}/iteration_{n}/mdout.mdp"
+                "-o", f"{self.working_dir}/sim_{i}/iteration_{n}/sys_EE.tpr",
+                "-po", f"{self.working_dir}/sim_{i}/iteration_{n}/mdout.mdp"
             ])
 
             # Add additional arguments if any
@@ -1394,7 +1397,7 @@ def _run_mdrun(self, n):
         if rank == 0:
             print('Running EXE simulations ...')
         if rank < self.n_sim:
-            os.chdir(f'sim_{rank}/iteration_{n}')
+            os.chdir(f'{self.working_dir}/sim_{rank}/iteration_{n}')
             returncode, stdout, stderr = utils.run_gmx_cmd(arguments)
             if returncode != 0:
                 print(f'Error on rank {rank} (return code: {returncode}):\n{stderr}')

From 81c146ae50d7a4f813789e3704e5fe8beff4592e Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 01:41:18 +0800
Subject: [PATCH 06/41] Removed the YAML parameter working_dir and added lines
 to parse working_dir from self.mdp

---
 docs/simulations.rst               | 8 +++-----
 ensemble_md/replica_exchange_EE.py | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/docs/simulations.rst b/docs/simulations.rst
index bffd7b40..f5979c24 100644
--- a/docs/simulations.rst
+++ b/docs/simulations.rst
@@ -226,24 +226,22 @@ include parameters for data analysis here.
 -------------------
 
   - :code:`gro`: (Required)
-      The input system configuration in the form of GRO file(s) used to initiate the REXEE simulation. If only one GRO file is specified,
+      The path of the input system configuration in the form of GRO file(s) used to initiate the REXEE simulation. If only one GRO file is specified,
       it will be used to initiate all the replicas. If multiple GRO files are specified (using the YAML syntax),
       the number of GRO files has to be the same as the number of replicas. 
   - :code:`top`: (Required)
-      The input system topology in the form of TOP file(s) used to initiate the REXEE simulation. If only one TOP file is specified,
+      The path of the input system topology in the form of TOP file(s) used to initiate the REXEE simulation. If only one TOP file is specified,
       it will be used to initiate all the replicas. If multiple TOP files are specified (using the YAML syntax),
       the number of TOP files has to be the same as the number of replicas. In the case where multiple TOP and GRO files are specified,
       the i-th TOP file corresponds to the i-th GRO file.
   - :code:`mdp`: (Required)
-      The input MDP file used to initiate the REXEE simulation. Specifically, this input MDP file will serve as a template for
+      The path of the input MDP file used to initiate the REXEE simulation. Specifically, this input MDP file will serve as a template for
       customizing MDP files for all replicas. Therefore, the MDP template must have the whole range of :math:`λ` values. 
       and the corresponding weights (in fixed-weight simulations). This holds for REXEE simulations for multiple serial mutations as well.
       For example, in an REXEE simulation that mutates methane to ethane in one replica and ethane to propane in the other replica, if
       exchanges only occur in the end states, then one could have :math:`λ` values like :code:`0.0 0.3 0.7 1.0 0.0 0.3 ...`. Notably, unlike
       the parameters :code:`gro` and :code:`top`, only one MDP file can be specified for the parameter :code:`mdp`. If you wish to use
       different parameters for different replicas, please use the parameter :code:`mdp_args`.
-  - :code:`working_dir`: (Optional, Default: :code:`os.getcwd()`)
-      The working directory where the REXEE simulation will be performed. If not specified, the current working directory will be used.
   - :code:`modify_coords`: (Optional, Default: :code:`None`)
       The name of the Python module (without including the :code:`.py` extension) for modifying the output coordinates of the swapping replicas
       before the coordinate exchange, which is generally required in REXEE simulations for multiple serial mutations.
diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py
index c5cfe0d0..84f8f61d 100644
--- a/ensemble_md/replica_exchange_EE.py
+++ b/ensemble_md/replica_exchange_EE.py
@@ -152,7 +152,6 @@ def set_params(self, analysis):
         # Step 3: Handle the optional YAML parameters
         # Key: Optional argument; Value: Default value
         optional_args = {
-            "working_dir": os.getcwd(),
             "add_swappables": None,
             "modify_coords": None,
             "nst_sim": None,
@@ -242,6 +241,7 @@ def set_params(self, analysis):
         for i in params_str:
             if type(getattr(self, i)) != str:
                 raise ParameterError(f"The parameter '{i}' should be a string.")
+        self.working_dir = os.path.dirname(self.mdp)
 
         params_bool = ['verbose', 'rm_cpt', 'msm', 'free_energy', 'subsampling_avg', 'w_combine']
         for i in params_bool:

From 060c608107df603a40db8e735f4aab0eb4ba2748 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 01:50:25 +0800
Subject: [PATCH 07/41] Minor tweaks for the working directory

---
 ensemble_md/cli/run_REXEE.py       | 26 +++++++++++++-------------
 ensemble_md/replica_exchange_EE.py |  2 ++
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/ensemble_md/cli/run_REXEE.py b/ensemble_md/cli/run_REXEE.py
index dc7b18aa..6d63bc2c 100644
--- a/ensemble_md/cli/run_REXEE.py
+++ b/ensemble_md/cli/run_REXEE.py
@@ -94,10 +94,10 @@ def main():
         # 2-1. Set up input files for all simulations
         if rank == 0:
             for i in range(REXEE.n_sim):
-                os.mkdir(f'sim_{i}')
-                os.mkdir(f'sim_{i}/iteration_0')
+                os.mkdir(f'{REXEE.working_dir}/sim_{i}')
+                os.mkdir(f'{REXEE.working_dir}/sim_{i}/iteration_0')
                 MDP = REXEE.initialize_MDP(i)
-                MDP.write(f"sim_{i}/iteration_0/expanded.mdp", skipempty=True)
+                MDP.write(f"{REXEE.working_dir}/sim_{i}/iteration_0/expanded.mdp", skipempty=True)
 
         # 2-2. Run the first set of simulations
         REXEE.run_REXEE(0)
@@ -115,10 +115,10 @@ def main():
             else:
                 print('Deleting data generated after the checkpoint ...')
                 for i in range(REXEE.n_sim):
-                    n_finished = len(next(os.walk(f'sim_{i}'))[1])  # number of finished iterations
+                    n_finished = len(next(os.walk(f'{REXEE.working_dir}/sim_{i}'))[1])  # number of finished iterations
                     for j in range(start_idx, n_finished):
-                        print(f'  Deleting the folder sim_{i}/iteration_{j}')
-                        shutil.rmtree(f'sim_{i}/iteration_{j}')
+                        print(f'  Deleting the folder {REXEE.working_dir}/sim_{i}/iteration_{j}')
+                        shutil.rmtree(f'{REXEE.working_dir}/sim_{i}/iteration_{j}')
 
             # Read g_vecs.npy and rep_trajs.npy so that new data can be appended, if any.
             # Note that these two arrays are created in rank 0 and should always be operated in rank 0,
@@ -132,7 +132,7 @@ def main():
         start_idx = comm.bcast(start_idx, root=0)  # so that all the ranks are aware of start_idx
 
     # 2-3. Get the reference distance for the distance restraint specified in the pull code, if any.
-    pullx_file = 'sim_0/iteration_0/pullx.xvg'
+    pullx_file = f'{REXEE.working_dir}/sim_0/iteration_0/pullx.xvg'
     REXEE.get_ref_dist(pullx_file)
 
     for i in range(start_idx, REXEE.n_iter):
@@ -144,8 +144,8 @@ def main():
                 # 3-1. For all the replica simulations,
                 #   (1) Find the last sampled state and the corresponding lambda values from the DHDL files.
                 #   (2) Find the final Wang-Landau incrementors and weights from the LOG files.
-                dhdl_files = [f'sim_{j}/iteration_{i - 1}/dhdl.xvg' for j in range(REXEE.n_sim)]
-                log_files = [f'sim_{j}/iteration_{i - 1}/md.log' for j in range(REXEE.n_sim)]
+                dhdl_files = [f'{REXEE.working_dir}/sim_{j}/iteration_{i - 1}/dhdl.xvg' for j in range(REXEE.n_sim)]
+                log_files = [f'{REXEE.working_dir}/sim_{j}/iteration_{i - 1}/md.log' for j in range(REXEE.n_sim)]
                 states_ = REXEE.extract_final_dhdl_info(dhdl_files)
                 wl_delta, weights_, counts_ = REXEE.extract_final_log_info(log_files)
                 print()
@@ -246,9 +246,9 @@ def main():
                 # Here we keep the lambda range set in mdp the same across different iterations in the same folder but swap out the gro file  # noqa: E501
                 # Note we use states (copy of states_) instead of states_ in update_MDP.
                 for j in list(range(REXEE.n_sim)):
-                    os.mkdir(f'sim_{j}/iteration_{i}')
+                    os.mkdir(f'{REXEE.working_dir}/sim_{j}/iteration_{i}')
                     MDP = REXEE.update_MDP(f"sim_{j}/iteration_{i - 1}/expanded.mdp", j, i, states, wl_delta, weights, counts)   # modify with a new template  # noqa: E501
-                    MDP.write(f"sim_{j}/iteration_{i}/expanded.mdp", skipempty=True)
+                    MDP.write(f"{REXEE.working_dir}/sim_{j}/iteration_{i}/expanded.mdp", skipempty=True)
                     # In run_REXEE(i, swap_pattern), where the tpr files will be generated, we use the top file at the
                     # level of the simulation (the file that will be shared by all simulations). For the gro file, we
                     # pass swap_pattern to the function to figure it out internally.
@@ -292,8 +292,8 @@ def main():
                         for j in range(len(swap_list)):
                             print('\nModifying the coordinates of the following output GRO files ...')
                             # gro_1 and gro_2 are the simlation outputs (that we want to back up) and the inputs to modify_coords  # noqa: E501
-                            gro_1 = f'sim_{swap_list[j][0]}/iteration_{i-1}/confout.gro'
-                            gro_2 = f'sim_{swap_list[j][1]}/iteration_{i-1}/confout.gro'
+                            gro_1 = f'{REXEE.working_dir}/sim_{swap_list[j][0]}/iteration_{i-1}/confout.gro'
+                            gro_2 = f'{REXEE.working_dir}/sim_{swap_list[j][1]}/iteration_{i-1}/confout.gro'
                             print(f'  - {gro_1}\n  - {gro_2}')
 
                             # Now we rename gro_1 and gro_2 to back them up
diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py
index 84f8f61d..e4ecaff1 100644
--- a/ensemble_md/replica_exchange_EE.py
+++ b/ensemble_md/replica_exchange_EE.py
@@ -242,6 +242,8 @@ def set_params(self, analysis):
             if type(getattr(self, i)) != str:
                 raise ParameterError(f"The parameter '{i}' should be a string.")
         self.working_dir = os.path.dirname(self.mdp)
+        if self.working_dir == '':
+            self.working_dir = '.'
 
         params_bool = ['verbose', 'rm_cpt', 'msm', 'free_energy', 'subsampling_avg', 'w_combine']
         for i in params_bool:

From 2a76ea54ebbc38c2e9568736d51901b6c2801c46 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 02:01:33 +0800
Subject: [PATCH 08/41] Added a unit test for _run_grompp

---
 ensemble_md/tests/test_mpi_func.py | 82 ++++++++++++++++++++++++++++--
 1 file changed, 79 insertions(+), 3 deletions(-)

diff --git a/ensemble_md/tests/test_mpi_func.py b/ensemble_md/tests/test_mpi_func.py
index 997dd452..6bcc0f4d 100644
--- a/ensemble_md/tests/test_mpi_func.py
+++ b/ensemble_md/tests/test_mpi_func.py
@@ -1,7 +1,83 @@
+####################################################################
+#                                                                  #
+#    ensemble_md,                                                  #
+#    a python package for running GROMACS simulation ensembles     #
+#                                                                  #
+#    Written by Wei-Tse Hsu <wehs7661@colorado.edu>                #
+#    Copyright (c) 2022 University of Colorado Boulder             #
+#                                                                  #
+####################################################################
+"""
+Unit tests for the functions that use MPI, including `_run_grompp`, `_run_mdrun` and `run_REXEE`.
+"""
+import os
+import yaml
+import shutil
 import pytest
 from mpi4py import MPI
+from ensemble_md.replica_exchange_EE import ReplicaExchangeEE
+
+current_path = os.path.dirname(os.path.abspath(__file__))
+input_path = os.path.join(current_path, "data")
+
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+
+@pytest.fixture
+def params_dict():
+    """
+    Generates a dictionary containing the required REXEE parameters.
+    """
+    REXEE_dict = {
+        'gmx_executable': 'gmx',
+        'gro': 'ensemble_md/tests/data/sys.gro',
+        'top': 'ensemble_md/tests/data/sys.top',
+        'mdp': 'ensemble_md/tests/data/expanded.mdp',
+        'n_sim': 4,
+        'n_iter': 10,
+        's': 1,
+        'working_dir': 'ensemble_md/tests/data',
+    }
+    yield REXEE_dict
+
+    # Remove the file after the unit test is done.
+    if os.path.isfile('params.yaml') is True:
+        os.remove('params.yaml')
+
+    # Remove the file after the unit test is done.
+    if os.path.isfile('params.yaml') is True:
+        os.remove('params.yaml')
+
+
+def get_REXEE_instance(input_dict, yml_file='params.yaml'):
+    """
+    Saves a dictionary as a yaml file and use it to instantiate the ReplicaExchangeEE class.
+    """
+    with open(yml_file, 'w') as f:
+        yaml.dump(input_dict, f)
+    REXEE = ReplicaExchangeEE(yml_file)
+    return REXEE
+
 
 @pytest.mark.mpi
-def test_size():
-    comm = MPI.COMM_WORLD
-    assert comm.size == 4
+def test_run_grompp(params_dict):
+    params_dict['grompp_args'] = {'-maxwarn': '1'}
+
+    # Case 1: The first iteration, i.e., n = 0
+    n = 0
+    swap_pattern = [1, 0, 2, 3]
+    REXEE = get_REXEE_instance(params_dict)
+
+    if rank == 0:
+        for i in range(params_dict['n_sim']):
+            os.makedirs(f'{REXEE.working_dir}/sim_{i}/iteration_{n}')
+            shutil.copy(REXEE.mdp, f'{REXEE.working_dir}/sim_{i}/iteration_{n}/expanded.mdp')
+
+    REXEE._run_grompp(n, swap_pattern)
+
+    # Check if the output files are generated, then clean up
+    if rank == 0:
+        for i in range(params_dict['n_sim']):
+            assert os.path.isfile(f'{REXEE.working_dir}/sim_{i}/iteration_0/sys_EE.tpr') is True
+            assert os.path.isfile(f'{REXEE.working_dir}/sim_{i}/iteration_0/mdout.mdp') is True
+            shutil.rmtree(f'{REXEE.working_dir}/sim_{i}')

From ab675f45732ff2084795aecf123aa146888c8098 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 02:37:47 +0800
Subject: [PATCH 09/41] Some minor tweaks

---
 .gitignore                                    |  1 +
 ensemble_md/replica_exchange_EE.py            |  6 +--
 ensemble_md/tests/test_mpi_func.py            | 48 +++++++++++++++++++
 ensemble_md/tests/test_replica_exchange_EE.py |  3 +-
 4 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index d637be63..f2b3757f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,6 +46,7 @@ nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
+sim_*
 
 # Translations
 *.mo
diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py
index e4ecaff1..d714d2d3 100644
--- a/ensemble_md/replica_exchange_EE.py
+++ b/ensemble_md/replica_exchange_EE.py
@@ -600,7 +600,7 @@ def get_ref_dist(self, pullx_file=None):
         Gets the reference distance(s) to use starting from the second iteration if distance restraint(s) are used.
         Specifically, a reference distance determined here is the initial COM distance between the pull groups
         in the input GRO file. This function initializes the attribute :code:`ref_dist`.
-        
+
         Parameter
         ---------
         pullx_file : str
@@ -920,7 +920,7 @@ def get_swapping_pattern(self, dhdl_files, states):
 
                 swap = ReplicaExchangeEE.propose_swap(swappables)
                 print(f'\nProposed swap: {swap}')
-                if swap == []:  # the same as len(swappables) == 0, self.proposal must not be exhaustive if this line is reached.
+                if swap == []:  # the same as len(swappables) == 0, self.proposal must not be exhaustive if this line is reached.  # noqa: E501
                     self.n_empty_swappable += 1
                     print('No swap is proposed because there is no swappable pair at all.')
                     break  # no need to re-identify swappable pairs and draw new samples
@@ -1332,7 +1332,7 @@ def _run_grompp(self, n, swap_pattern):
                 else:
                     gro = f"{self.gro}"
             else:
-                gro = f"{self.working_dir}/sim_{swap_pattern[i]}/iteration_{n-1}/confout.gro"  # This effectively swap out GRO files
+                gro = f"{self.working_dir}/sim_{swap_pattern[i]}/iteration_{n-1}/confout.gro"  # This effectively swap out GRO files  # noqa: E501
 
             if isinstance(self.top, list):
                 top = f"{self.top[i]}"
diff --git a/ensemble_md/tests/test_mpi_func.py b/ensemble_md/tests/test_mpi_func.py
index 6bcc0f4d..352fbc74 100644
--- a/ensemble_md/tests/test_mpi_func.py
+++ b/ensemble_md/tests/test_mpi_func.py
@@ -23,6 +23,7 @@
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 
+
 @pytest.fixture
 def params_dict():
     """
@@ -59,6 +60,40 @@ def get_REXEE_instance(input_dict, yml_file='params.yaml'):
     return REXEE
 
 
+def get_gmx_cmd_from_output(output):
+    """
+    Given a GROMACS output file like a LOG file or `mdout.mdp`, extract the GROMACS command that was run.
+
+    Parameters
+    ----------
+    output : str
+        The path to the GROMACS output file.
+
+    Returns
+    -------
+    cmd : str
+        The GROMACS command that was run.
+    flags : dict
+        The flags and values that were used in the GROMACS command.
+    """
+    f = open(output, 'r')
+    lines = f.readlines()
+    f.close()
+    n = -1
+    for l in lines:  # noqa: E741
+        n += 1
+        if l.startswith('Command line'):
+            cmd = lines[n+1].strip()
+
+    flags = {}
+    cmd_split = cmd.split(' ')
+    for i in range(len(cmd_split)):
+        if cmd_split[i].startswith('-'):
+            flags[cmd_split[i]] = cmd_split[i+1]
+
+    return cmd, flags
+
+
 @pytest.mark.mpi
 def test_run_grompp(params_dict):
     params_dict['grompp_args'] = {'-maxwarn': '1'}
@@ -80,4 +115,17 @@ def test_run_grompp(params_dict):
         for i in range(params_dict['n_sim']):
             assert os.path.isfile(f'{REXEE.working_dir}/sim_{i}/iteration_0/sys_EE.tpr') is True
             assert os.path.isfile(f'{REXEE.working_dir}/sim_{i}/iteration_0/mdout.mdp') is True
+
+            # Here we check if the command executed was what we expected
+            mdp = f'{REXEE.working_dir}/sim_{i}/iteration_0/mdout.mdp'
+            gro = params_dict['gro']
+            top = params_dict['top']
+            tpr = f'{REXEE.working_dir}/sim_{i}/iteration_0/sys_EE.tpr'
+            mdout = f'{REXEE.working_dir}/sim_{i}/iteration_0/mdout.mdp'
+            cmd = f'{REXEE.check_gmx_executable} -f {mdp} -c {gro} -p {top} -o {tpr} -po {mdout} -maxwarn 1'
+            assert get_gmx_cmd_from_output(mdout)[0] == cmd
+
             shutil.rmtree(f'{REXEE.working_dir}/sim_{i}')
+
+    # Case 2: Other iterations, i.e., n != 0
+    # More to come ...
diff --git a/ensemble_md/tests/test_replica_exchange_EE.py b/ensemble_md/tests/test_replica_exchange_EE.py
index 7b6bac7f..3a2880d8 100644
--- a/ensemble_md/tests/test_replica_exchange_EE.py
+++ b/ensemble_md/tests/test_replica_exchange_EE.py
@@ -460,7 +460,7 @@ def test_update_MDP(self, params_dict):
         REXEE.equil = [-1, 1, 0, -1]  # i.e., the 3rd replica will use fixed weights in the next iteration
         MDP_1 = REXEE.update_MDP(
             new_template, 2, iter_idx, states, wl_delta, weights)  # third replica
-        
+
         REXEE.get_ref_dist('ensemble_md/tests/data/pullx.xvg')  # so that we can test the pull code
         MDP_2 = REXEE.update_MDP(
             new_template, 3, iter_idx, states, wl_delta, weights, counts)  # fourth replica
@@ -663,7 +663,6 @@ def test_get_swapping_pattern(self, params_dict):
         assert pattern_4_3 == [0, 2, 1, 3]
         assert swap_list_4_3 == [(1, 2)]
 
-
     def test_calc_prob_acc(self, capfd, params_dict):
         # k = 1.380649e-23; NA = 6.0221408e23; T = 298; kT = k * NA * T / 1000 = 2.4777098766670016
         REXEE = get_REXEE_instance(params_dict)

From 8592b3ea127984ae25d10186fff72906ed933ab9 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 02:46:26 +0800
Subject: [PATCH 10/41] Fixed a bug in test_mpi_func.py

---
 ensemble_md/tests/test_mpi_func.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/ensemble_md/tests/test_mpi_func.py b/ensemble_md/tests/test_mpi_func.py
index 352fbc74..13f9d611 100644
--- a/ensemble_md/tests/test_mpi_func.py
+++ b/ensemble_md/tests/test_mpi_func.py
@@ -79,11 +79,19 @@ def get_gmx_cmd_from_output(output):
     f = open(output, 'r')
     lines = f.readlines()
     f.close()
+
     n = -1
+    cmd = None
     for l in lines:  # noqa: E741
         n += 1
-        if l.startswith('Command line'):
-            cmd = lines[n+1].strip()
+        if 'Command line' in l:
+            if lines[n + 1].startswith(';'):
+                cmd = lines[n+1].split(';')[1].strip()
+            else:
+                cmd = lines[n+1].strip()
+            break
+    if cmd is None:
+        raise ValueError(f'Could not find the GROMACS command in the file {output}.')
 
     flags = {}
     cmd_split = cmd.split(' ')

From 80c6d571b24847f1b21123be2cb06c5075b00ece Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 02:50:13 +0800
Subject: [PATCH 11/41] Changed the image in config.yml for CircleCI

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 13b0fb97..943c7733 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -4,7 +4,7 @@ jobs:
   test:
     machine:
       # Note that ubuntu with versions later than 2022 triggers an interative prompt that gets CI stuck
-      image: ubuntu-2004:202107-02
+      image: ubuntu-2204:edge
     environment:
       PYTHON_VERSION: "3.8"
     steps:

From d57ba9999b4c22f539eb686ee1e0d1ab7a43858a Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 03:39:31 +0800
Subject: [PATCH 12/41] Modified .circleci/config.yml

---
 .circleci/config.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 943c7733..d50607e5 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -4,7 +4,7 @@ jobs:
   test:
     machine:
       # Note that ubuntu with versions later than 2022 triggers an interative prompt that gets CI stuck
-      image: ubuntu-2204:edge
+      image: ubuntu-2004:202107-02
     environment:
       PYTHON_VERSION: "3.8"
     steps:
@@ -57,7 +57,7 @@ jobs:
             pip3 install pytest-mpi
             pip3 install pytest-cov
             pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/
-            mpirun -np 4 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/test_mpi.py --with-mpi
+            mpirun -np 4 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/test_mpi_func.py --with-mpi
 
       - run:
           name: CodeCov

From b1876dcc1756bbaf521de0094f0b61ebebf81bfd Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 12:24:33 +0800
Subject: [PATCH 13/41] Fixed a bug in the unit test of test_mpi_func.py

---
 ensemble_md/tests/test_mpi_func.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/ensemble_md/tests/test_mpi_func.py b/ensemble_md/tests/test_mpi_func.py
index 13f9d611..290fa2ac 100644
--- a/ensemble_md/tests/test_mpi_func.py
+++ b/ensemble_md/tests/test_mpi_func.py
@@ -17,9 +17,6 @@
 from mpi4py import MPI
 from ensemble_md.replica_exchange_EE import ReplicaExchangeEE
 
-current_path = os.path.dirname(os.path.abspath(__file__))
-input_path = os.path.join(current_path, "data")
-
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 
@@ -45,10 +42,6 @@ def params_dict():
     if os.path.isfile('params.yaml') is True:
         os.remove('params.yaml')
 
-    # Remove the file after the unit test is done.
-    if os.path.isfile('params.yaml') is True:
-        os.remove('params.yaml')
-
 
 def get_REXEE_instance(input_dict, yml_file='params.yaml'):
     """
@@ -125,12 +118,13 @@ def test_run_grompp(params_dict):
             assert os.path.isfile(f'{REXEE.working_dir}/sim_{i}/iteration_0/mdout.mdp') is True
 
             # Here we check if the command executed was what we expected
-            mdp = f'{REXEE.working_dir}/sim_{i}/iteration_0/mdout.mdp'
+            mdp = f'{REXEE.working_dir}/sim_{i}/iteration_0/expanded.mdp'
             gro = params_dict['gro']
             top = params_dict['top']
             tpr = f'{REXEE.working_dir}/sim_{i}/iteration_0/sys_EE.tpr'
             mdout = f'{REXEE.working_dir}/sim_{i}/iteration_0/mdout.mdp'
-            cmd = f'{REXEE.check_gmx_executable} -f {mdp} -c {gro} -p {top} -o {tpr} -po {mdout} -maxwarn 1'
+            cmd = f'{REXEE.gmx_executable} grompp -f {mdp} -c {gro} -p {top} -o {tpr} -po {mdout} -maxwarn 1'
+            print(cmd)
             assert get_gmx_cmd_from_output(mdout)[0] == cmd
 
             shutil.rmtree(f'{REXEE.working_dir}/sim_{i}')

From 946d0c23e64b00c78c8d5f549ef6ff6688b69fac Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 15:32:36 +0800
Subject: [PATCH 14/41] Modified test_run_grompp and .circleci/config.yml

---
 .circleci/config.yml               |  5 +++--
 .gitignore                         |  1 +
 ensemble_md/tests/test_mpi_func.py | 29 ++++++++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index d50607e5..249067ac 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -56,8 +56,9 @@ jobs:
             pip3 install pytest
             pip3 install pytest-mpi
             pip3 install pytest-cov
-            pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/
-            mpirun -np 4 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/test_mpi_func.py --with-mpi
+            COVERAGE_FILE=.coverage_1 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/
+            COVERAGE_FILE=.coverage_2 mpirun -np 4 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/test_mpi_func.py --with-mpi
+            coverage combine .coverage_*
 
       - run:
           name: CodeCov
diff --git a/.gitignore b/.gitignore
index f2b3757f..8072ae5d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,6 +39,7 @@ pip-delete-this-directory.txt
 htmlcov/
 .tox/
 .coverage
+.coverage_*
 .coverage.*
 .cache
 .pytest_cache
diff --git a/ensemble_md/tests/test_mpi_func.py b/ensemble_md/tests/test_mpi_func.py
index 290fa2ac..889dd22e 100644
--- a/ensemble_md/tests/test_mpi_func.py
+++ b/ensemble_md/tests/test_mpi_func.py
@@ -106,6 +106,7 @@ def test_run_grompp(params_dict):
 
     if rank == 0:
         for i in range(params_dict['n_sim']):
+            # Here we use the template mdp file since this is mainly for testing the function, not the GROMACS command.
             os.makedirs(f'{REXEE.working_dir}/sim_{i}/iteration_{n}')
             shutil.copy(REXEE.mdp, f'{REXEE.working_dir}/sim_{i}/iteration_{n}/expanded.mdp')
 
@@ -130,4 +131,30 @@ def test_run_grompp(params_dict):
             shutil.rmtree(f'{REXEE.working_dir}/sim_{i}')
 
     # Case 2: Other iterations, i.e., n != 0
-    # More to come ...
+    n = 1  # For swap_pattern, we stick with [1, 0, 2, 3]
+    REXEE = get_REXEE_instance(params_dict)
+    if rank == 0:
+        for i in range(params_dict['n_sim']):
+            os.makedirs(f'{REXEE.working_dir}/sim_{i}/iteration_{n}')
+            os.makedirs(f'{REXEE.working_dir}/sim_{i}/iteration_{n-1}')
+            shutil.copy(REXEE.mdp, f'{REXEE.working_dir}/sim_{i}/iteration_{n}/expanded.mdp')
+            shutil.copy(REXEE.gro, f'{REXEE.working_dir}/sim_{i}/iteration_{n-1}/confout.gro')
+
+    REXEE._run_grompp(n, swap_pattern)
+
+    # Check if the output files are generated, then clean up
+    if rank == 0:
+        for i in range(params_dict['n_sim']):
+            assert os.path.isfile(f'{REXEE.working_dir}/sim_{i}/iteration_1/sys_EE.tpr') is True
+            assert os.path.isfile(f'{REXEE.working_dir}/sim_{i}/iteration_1/mdout.mdp') is True
+
+            # Here we check if the command executed was what we expected
+            mdp = f'{REXEE.working_dir}/sim_{i}/iteration_1/expanded.mdp'
+            gro = f'{REXEE.working_dir}/sim_{swap_pattern[i]}/iteration_0/confout.gro'
+            top = params_dict['top']
+            tpr = f'{REXEE.working_dir}/sim_{i}/iteration_1/sys_EE.tpr'
+            mdout = f'{REXEE.working_dir}/sim_{i}/iteration_1/mdout.mdp'
+            cmd = f'{REXEE.gmx_executable} grompp -f {mdp} -c {gro} -p {top} -o {tpr} -po {mdout} -maxwarn 1'
+            assert get_gmx_cmd_from_output(mdout)[0] == cmd
+
+            shutil.rmtree(f'{REXEE.working_dir}/sim_{i}')

From 7e2fb7d5435f558bd3309041e417a307ac63e96b Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 17:09:51 +0800
Subject: [PATCH 15/41] Added a test for compare_MDPs

---
 .circleci/config.yml                     |   7 +-
 ensemble_md/tests/data/mdp/compare_1.mdp | 122 ++++++++++++++++++++++
 ensemble_md/tests/data/mdp/compare_2.mdp | 122 ++++++++++++++++++++++
 ensemble_md/tests/data/mdp/compare_3.mdp | 123 +++++++++++++++++++++++
 ensemble_md/tests/test_gmx_parser.py     |  22 ++++
 ensemble_md/tests/test_mpi_func.py       |  11 +-
 6 files changed, 399 insertions(+), 8 deletions(-)
 create mode 100644 ensemble_md/tests/data/mdp/compare_1.mdp
 create mode 100644 ensemble_md/tests/data/mdp/compare_2.mdp
 create mode 100644 ensemble_md/tests/data/mdp/compare_3.mdp

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 249067ac..53621ef0 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -56,9 +56,10 @@ jobs:
             pip3 install pytest
             pip3 install pytest-mpi
             pip3 install pytest-cov
-            COVERAGE_FILE=.coverage_1 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/
-            COVERAGE_FILE=.coverage_2 mpirun -np 4 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/test_mpi_func.py --with-mpi
-            coverage combine .coverage_*
+            pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/
+            # COVERAGE_FILE=.coverage_1 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/
+            # COVERAGE_FILE=.coverage_2 mpirun -np 4 pytest -vv --disable-pytest-warnings --cov=ensemble_md --cov-report=xml --color=yes ensemble_md/tests/test_mpi_func.py --with-mpi
+            # coverage combine .coverage_*
 
       - run:
           name: CodeCov
diff --git a/ensemble_md/tests/data/mdp/compare_1.mdp b/ensemble_md/tests/data/mdp/compare_1.mdp
new file mode 100644
index 00000000..bddd8923
--- /dev/null
+++ b/ensemble_md/tests/data/mdp/compare_1.mdp
@@ -0,0 +1,122 @@
+; Run control
+integrator = md-vv
+tinit = 0
+dt = 0.002
+nsteps = 100000000
+nstcomm = 10
+
+; Output control
+nstlog = 1000
+nstcalcenergy = 10
+nstenergy = 1000
+nstxout_compressed = 1000
+
+; Neighborsearching and short-range nonbonded interactions
+nstlist = 10
+ns_type = grid
+pbc = xyz
+rlist = 1.3
+
+; Electrostatics
+coulombtype = PME-switch
+rcoulomb_switch = 0.88
+rcoulomb = 0.9
+
+; van der Waals
+vdw_type = switch
+rvdw_switch = 0.85
+rvdw = 0.9
+
+; Apply long range dispersion corrections for Energy and Pressure
+DispCorr = AllEnerPres
+
+; Spacing for the PME/PPPM FFT grid
+fourierspacing = 0.12
+
+; EWALD/PME/PPPM parameters
+pme_order = 4
+ewald_rtol = 1e-05
+ewald_geometry = 3d
+epsilon_surface = 0
+optimize_fft = yes
+
+; Temperature coupling
+tcoupl = nose-hoover
+nsttcouple = 10
+tc_grps = System
+tau_t = 1.0
+ref_t = 300
+
+; Pressure coupling is on for NPT
+pcoupl = no
+
+gen_vel = yes
+gen_temp = 300
+gen_seed = -1
+
+; options for bonds
+constraints = h-bonds
+
+; Type of constraint algorithm
+constraint_algorithm = shake
+shake_tol = 1e-05
+
+; Free energy calculation
+free_energy = expanded
+calc_lambda_neighbors = -1
+sc_alpha = 0.5
+sc_power = 1
+sc_sigma = 0.5
+couple_moltype = MOL
+couple_lambda0 = vdw-q
+couple_lambda1 = none
+couple_intramol = no
+init_lambda_state = 0
+nstdhdl = 100
+dhdl_print_energy = total
+
+; Seed for Monte Carlo in lambda space
+symmetrized_transition_matrix = no
+nst_transition_matrix = 100000
+; wl-scale                 = 0.8
+; wl-ratio                 = 0.7
+; init-wl-delta            = 10
+
+; expanded ensemble variables
+nstexpanded = 100
+lmc_stats = no
+lmc_move = metropolized-gibbs
+; lmc-weights-equil       = wl-delta
+; weight-equil-wl-delta   = 0.001
+; wl-oneovert             = yes
+
+; lambda-states          = 1      2      3      4      5      6      7      8      9      10     11     12     13     14     15     16     17     18     19     20     21     22     23     24     25     26     27     28     29     30     31     32     33     34     35     36     37     38     39     40
+
+coul_lambdas = 0.0 0.05 0.1 0.15 0.2 0.25 0.3 0.35 0.4 0.45 0.5 0.55 0.6 0.65 0.7 0.75 0.8 0.85 0.9 0.95 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
+vdw_lambdas = 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.1 0.2 0.3 0.4 0.45 0.5 0.55 0.6 0.63 0.66 0.69 0.72 0.75 0.78 0.81 0.84 0.88 0.92 1.0
+
+; PULL CODE
+pull = yes
+pull_ngroups = 2
+pull_ncoords = 1
+pull_group1_name = HOS
+pull_group2_name = MOL
+pull_pbc_ref_prev_step_com = yes
+
+pull_coord1_groups = 1 2
+pull_coord1_type = umbrella
+pull_coord1_geometry = distance
+pull_coord1_dim = Y Y Y
+pull_coord1_origin = 0.0 0.0 0.0
+pull_coord1_vec = 0.0 0.0 0.0
+pull_coord1_start = yes
+pull_coord1_init = 0
+pull_coord1_rate = 0
+pull_coord1_k = 0
+pull_coord1_kB = 1000
+pull_nstfout = 400000
+pull_nstxout = 1000
+pull-print-ref-value = yes
+
+restraint_lambdas = 0.0 0.0 0.0 0.0 0.0 0.0 0.01 0.05 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 0.95 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
+init_lambda_weights = 0.0 57.88597 112.71883 163.84425 210.48097 253.80261 294.79849 333.90408 370.82669 406.02515 438.53116 468.53751 496.24649 521.58417 544.57404 565.26697 583.7337 599.60651 613.43958 624.70471 633.95947 638.29785 642.44977 646.33551 649.91626 651.54779 652.93359 654.13263 654.94073 655.13086 655.07239 654.66443 653.68683 652.32123 650.72308 649.2381 647.94586 646.599 645.52063 643.99133
diff --git a/ensemble_md/tests/data/mdp/compare_2.mdp b/ensemble_md/tests/data/mdp/compare_2.mdp
new file mode 100644
index 00000000..baa4e137
--- /dev/null
+++ b/ensemble_md/tests/data/mdp/compare_2.mdp
@@ -0,0 +1,122 @@
+; Run control
+integrator = md-vv
+tinit = 0
+dt = 0.002
+nsteps = 100000000
+nstcomm = 10
+
+; Output control
+nstlog = 1000
+nstcalcenergy = 10
+nstenergy = 1000
+nstxout_compressed = 1000
+
+; Neighborsearching and short-range nonbonded interactions
+nstlist = 10
+ns_type = grid
+pbc = xyz
+rlist = 1.3
+
+; Electrostatics
+coulombtype = PME-switch
+rcoulomb_switch = 0.88
+rcoulomb = 0.9
+
+; van der Waals
+vdw_type = switch
+rvdw_switch = 0.85
+rvdw = 0.9
+
+; Apply long range dispersion corrections for Energy and Pressure
+DispCorr = AllEnerPres
+
+; Spacing for the PME/PPPM FFT grid
+fourierspacing = 0.12
+
+; EWALD/PME/PPPM parameters
+pme_order = 4
+ewald_rtol = 1e-05
+ewald_geometry = 3d
+epsilon_surface = 0
+optimize_fft = yes
+
+; Temperature coupling
+tcoupl = nose-hoover
+nsttcouple = 10
+tc_grps = System
+tau_t = 1.0
+ref_t = 300
+
+; Pressure coupling is on for NPT
+pcoupl = no
+
+gen_vel = yes
+gen_temp = 300
+gen_seed = -1
+
+; options for bonds
+constraints = h-bonds
+
+; Type of constraint algorithm
+constraint_algorithm = shake
+shake_tol = 1e-05
+
+; Free energy calculation
+free_energy = expanded
+calc_lambda_neighbors = -1
+sc_alpha = 0.5
+sc_power = 1
+sc_sigma = 0.5
+couple_moltype = MOL
+couple_lambda0 = vdw-q
+couple_lambda1 = none
+couple_intramol = no
+init_lambda_state = 0
+nstdhdl = 100
+dhdl_print_energy = total
+
+; Seed for Monte Carlo in lambda space
+symmetrized_transition_matrix = no
+nst_transition_matrix = 100000
+; wl-scale                 = 0.8
+; wl-ratio                 = 0.7
+; init-wl-delta            = 10
+
+; expanded ensemble variables
+nstexpanded = 100
+lmc_stats = no
+lmc_move = metropolized-gibbs
+; lmc-weights-equil       = wl-delta
+; weight-equil-wl-delta   = 0.001
+; wl-oneovert             = yes
+
+; lambda-states          = 1      2      3      4      5      6      7      8      9      10     11     12     13     14     15     16     17     18     19     20     21     22     23     24     25     26     27     28     29     30     31     32     33     34     35     36     37     38     39     40
+
+coul_lambdas = 0.0 0.05 0.1 0.15 0.2 0.25 0.3 0.35 0.4 0.45 0.5 0.55 0.6 0.65 0.7 0.75 0.8 0.85 0.9 0.95 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
+vdw_lambdas = 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.1 0.2 0.3 0.4 0.45 0.5 0.55 0.6 0.63 0.66 0.69 0.72 0.75 0.78 0.81 0.84 0.88 0.92 1.0
+
+; PULL CODE
+pull = yes
+pull_ngroups = 2
+pull_ncoords = 1
+pull_group1_name = HOS
+pull_group2_name = MOL
+pull_pbc_ref_prev_step_com = yes
+
+pull_coord1_groups = 1 2
+pull_coord1_type = umbrella
+pull_coord1_geometry = distance
+pull_coord1_dim = Y Y Y
+pull_coord1_origin = 0.0 0.0 0.0
+pull_coord1_vec = 0.0 0.0 0.0
+pull_coord1_start = yes
+pull_coord1_init = 0
+pull_coord1_rate = 0
+pull_coord1_k = 0
+pull_coord1_kB = 1000
+pull_nstfout = 400000
+pull_nstxout = 1000
+pull_print_ref_value = yes
+
+restraint_lambdas = 0.0 0.0 0.0 0.0 0.0 0.0 0.01 0.05 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 0.95 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
+init_lambda_weights = 0.0 57.88597 112.71883 163.84425 210.48097 253.80261 294.79849 333.90408 370.82669 406.02515 438.53116 468.53751 496.24649 521.58417 544.57404 565.26697 583.7337 599.60651 613.43958 624.70471 633.95947 638.29785 642.44977 646.33551 649.91626 651.54779 652.93359 654.13263 654.94073 655.13086 655.07239 654.66443 653.68683 652.32123 650.72308 649.2381 647.94586 646.599 645.52063 643.99133
diff --git a/ensemble_md/tests/data/mdp/compare_3.mdp b/ensemble_md/tests/data/mdp/compare_3.mdp
new file mode 100644
index 00000000..4b36dabd
--- /dev/null
+++ b/ensemble_md/tests/data/mdp/compare_3.mdp
@@ -0,0 +1,123 @@
+; Run control
+integrator = md-vv
+tinit = 0
+dt = 0.002
+nsteps = 100000000
+nstcomm = 10
+
+; Output control
+nstlog = 1000
+nstcalcenergy = 10
+nstenergy = 1000
+nstxout_compressed = 1000
+
+; Neighborsearching and short-range nonbonded interactions
+nstlist = 10
+ns_type = grid
+pbc = xyz
+rlist = 1.3
+
+; Electrostatics
+coulombtype = PME-switch
+rcoulomb_switch = 0.88
+rcoulomb = 0.9
+
+; van der Waals
+vdw_type = switch
+rvdw_switch = 0.85
+rvdw = 0.9
+
+; Apply long range dispersion corrections for Energy and Pressure
+DispCorr = AllEnerPres
+
+; Spacing for the PME/PPPM FFT grid
+fourierspacing = 0.12
+
+; EWALD/PME/PPPM parameters
+pme_order = 4
+ewald_rtol = 1e-05
+ewald_geometry = 3d
+epsilon_surface = 0
+optimize_fft = yes
+
+; Temperature coupling
+tcoupl = nose-hoover
+nsttcouple = 10
+tc_grps = System
+tau_t = 1.0
+ref_t = 300
+
+; Pressure coupling is on for NPT
+pcoupl = no
+
+gen_vel = yes
+gen_temp = 300
+gen_seed = -1
+
+; options for bonds
+constraints = h-bonds
+
+; Type of constraint algorithm
+constraint_algorithm = shake
+shake_tol = 1e-05
+
+; Free energy calculation
+free_energy = expanded
+calc_lambda_neighbors = -1
+sc_alpha = 0.5
+sc_power = 1
+sc_sigma = 0.5
+couple_moltype = MOL
+couple_lambda0 = vdw-q
+couple_lambda1 = none
+couple_intramol = no
+init_lambda_state = 0
+nstdhdl = 10
+dhdl_print_energy = total
+
+; Seed for Monte Carlo in lambda space
+lmc_seed = -1
+symmetrized_transition_matrix = no
+nst_transition_matrix = 100000
+wl_scale = 0.8
+wl_ratio = 0.7
+init_wl_delta = 10
+
+; expanded ensemble variables
+nstexpanded = 10
+lmc_stats = wang-landau
+lmc_move = metropolized-gibbs
+lmc_weights_equil = wl-delta
+weight_equil_wl_delta = 0.001
+wl_oneovert = yes
+
+; lambda-states          = 1      2      3      4      5      6      7      8      9      10     11     12     13     14     15     16     17     18     19     20     21     22     23     24     25     26     27     28     29     30     31     32     33     34     35     36     37     38     39     40
+
+coul_lambdas = 0.0 0.05 0.1 0.15 0.2 0.25 0.3 0.35 0.4 0.45 0.5 0.55 0.6 0.65 0.7 0.75 0.8 0.85 0.9 0.95 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
+vdw_lambdas = 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.1 0.2 0.3 0.4 0.45 0.5 0.55 0.6 0.63 0.66 0.69 0.72 0.75 0.78 0.81 0.84 0.88 0.92 1.0
+
+; PULL CODE
+pull = yes
+pull_ngroups = 2
+pull_ncoords = 1
+pull_group1_name = HOS
+pull_group2_name = MOL
+pull_pbc_ref_prev_step_com = yes
+
+pull_coord1_groups = 1 2
+pull_coord1_type = umbrella
+pull_coord1_geometry = distance
+pull_coord1_dim = Y Y Y
+pull_coord1_origin = 0.0 0.0 0.0
+pull_coord1_vec = 0.0 0.0 0.0
+pull_coord1_start = yes
+pull_coord1_init = 0
+pull_coord1_rate = 0
+pull_coord1_k = 0
+pull_coord1_kB = 1000
+pull_nstfout = 400000
+pull_nstxout = 1000
+pull_print_ref_value = yes
+
+restraint_lambdas = 0.0 0.0 0.0 0.0 0.0 0.0 0.01 0.05 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 0.95 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
+
diff --git a/ensemble_md/tests/test_gmx_parser.py b/ensemble_md/tests/test_gmx_parser.py
index dcb5f35c..0caef7ee 100644
--- a/ensemble_md/tests/test_gmx_parser.py
+++ b/ensemble_md/tests/test_gmx_parser.py
@@ -98,3 +98,25 @@ def test_write(self):
         mdp.write('test_2.mdp', skipempty=True)
         os.remove('test_1.mdp')
         os.remove('test_2.mdp')
+
+def test_compare_MDPs():
+    mdp_list = ['ensemble_md/tests/data/mdp/compare_1.mdp', 'ensemble_md/tests/data/mdp/compare_2.mdp', 'ensemble_md/tests/data/mdp/compare_3.mdp']
+    result_1 = gmx_parser.compare_MDPs(mdp_list[:2], print_diff=True)
+    result_2 = gmx_parser.compare_MDPs(mdp_list[1:], print_diff=True)
+    dict_1 = {}  # the first two are the same but just in different formats
+    dict_2 = {
+        'nstdhdl': [100, 10],
+        'wl_oneovert': [None, 'yes'],
+        'weight_equil_wl_delta': [None, 0.001],
+        'init_lambda_weights': [[0.0, 57.88597, 112.71883, 163.84425, 210.48097, 253.80261, 294.79849, 333.90408, 370.82669, 406.02515, 438.53116, 468.53751, 496.24649, 521.58417, 544.57404, 565.26697, 583.7337, 599.60651, 613.43958, 624.70471, 633.95947, 638.29785, 642.44977, 646.33551, 649.91626, 651.54779, 652.93359, 654.13263, 654.94073, 655.13086, 655.07239, 654.66443, 653.68683, 652.32123, 650.72308, 649.2381, 647.94586, 646.599, 645.52063, 643.99133], None],
+        'wl_ratio': [None, 0.7],
+        'lmc_weights_equil': [None, 'wl_delta'],
+        'lmc_stats': ['no', 'wang_landau'],
+        'wl_scale': [None, 0.8],
+        'init_wl_delta': [None, 10],
+        'lmc_seed': [None, -1],
+        'nstexpanded': [100, 10]
+    }
+
+    assert result_1 == dict_1
+    assert result_2 == dict_2
diff --git a/ensemble_md/tests/test_mpi_func.py b/ensemble_md/tests/test_mpi_func.py
index 889dd22e..07557847 100644
--- a/ensemble_md/tests/test_mpi_func.py
+++ b/ensemble_md/tests/test_mpi_func.py
@@ -14,12 +14,8 @@
 import yaml
 import shutil
 import pytest
-from mpi4py import MPI
 from ensemble_md.replica_exchange_EE import ReplicaExchangeEE
 
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-
 
 @pytest.fixture
 def params_dict():
@@ -30,7 +26,7 @@ def params_dict():
         'gmx_executable': 'gmx',
         'gro': 'ensemble_md/tests/data/sys.gro',
         'top': 'ensemble_md/tests/data/sys.top',
-        'mdp': 'ensemble_md/tests/data/expanded.mdp',
+        'mdp': 'ensemble_md/tests/data/mdp/expanded.mdp',
         'n_sim': 4,
         'n_iter': 10,
         's': 1,
@@ -50,6 +46,7 @@ def get_REXEE_instance(input_dict, yml_file='params.yaml'):
     with open(yml_file, 'w') as f:
         yaml.dump(input_dict, f)
     REXEE = ReplicaExchangeEE(yml_file)
+    print(os.path.abspath(yml_file))
     return REXEE
 
 
@@ -97,6 +94,10 @@ def get_gmx_cmd_from_output(output):
 
 @pytest.mark.mpi
 def test_run_grompp(params_dict):
+    from mpi4py import MPI
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+
     params_dict['grompp_args'] = {'-maxwarn': '1'}
 
     # Case 1: The first iteration, i.e., n = 0

From 194235c5f704c89a26f59d06c3cb3fb26c64a87b Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 26 Mar 2024 19:17:45 +0800
Subject: [PATCH 16/41] Add unit tests for utils.py and tweaked utils.py

---
 ensemble_md/tests/test_gmx_parser.py |  5 +-
 ensemble_md/tests/test_utils.py      | 97 ++++++++++++++++++++++++++++
 ensemble_md/utils/utils.py           | 20 ++++--
 3 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/ensemble_md/tests/test_gmx_parser.py b/ensemble_md/tests/test_gmx_parser.py
index 0caef7ee..1a929645 100644
--- a/ensemble_md/tests/test_gmx_parser.py
+++ b/ensemble_md/tests/test_gmx_parser.py
@@ -99,8 +99,9 @@ def test_write(self):
         os.remove('test_1.mdp')
         os.remove('test_2.mdp')
 
+
 def test_compare_MDPs():
-    mdp_list = ['ensemble_md/tests/data/mdp/compare_1.mdp', 'ensemble_md/tests/data/mdp/compare_2.mdp', 'ensemble_md/tests/data/mdp/compare_3.mdp']
+    mdp_list = ['ensemble_md/tests/data/mdp/compare_1.mdp', 'ensemble_md/tests/data/mdp/compare_2.mdp', 'ensemble_md/tests/data/mdp/compare_3.mdp']  # noqa: E501
     result_1 = gmx_parser.compare_MDPs(mdp_list[:2], print_diff=True)
     result_2 = gmx_parser.compare_MDPs(mdp_list[1:], print_diff=True)
     dict_1 = {}  # the first two are the same but just in different formats
@@ -108,7 +109,7 @@ def test_compare_MDPs():
         'nstdhdl': [100, 10],
         'wl_oneovert': [None, 'yes'],
         'weight_equil_wl_delta': [None, 0.001],
-        'init_lambda_weights': [[0.0, 57.88597, 112.71883, 163.84425, 210.48097, 253.80261, 294.79849, 333.90408, 370.82669, 406.02515, 438.53116, 468.53751, 496.24649, 521.58417, 544.57404, 565.26697, 583.7337, 599.60651, 613.43958, 624.70471, 633.95947, 638.29785, 642.44977, 646.33551, 649.91626, 651.54779, 652.93359, 654.13263, 654.94073, 655.13086, 655.07239, 654.66443, 653.68683, 652.32123, 650.72308, 649.2381, 647.94586, 646.599, 645.52063, 643.99133], None],
+        'init_lambda_weights': [[0.0, 57.88597, 112.71883, 163.84425, 210.48097, 253.80261, 294.79849, 333.90408, 370.82669, 406.02515, 438.53116, 468.53751, 496.24649, 521.58417, 544.57404, 565.26697, 583.7337, 599.60651, 613.43958, 624.70471, 633.95947, 638.29785, 642.44977, 646.33551, 649.91626, 651.54779, 652.93359, 654.13263, 654.94073, 655.13086, 655.07239, 654.66443, 653.68683, 652.32123, 650.72308, 649.2381, 647.94586, 646.599, 645.52063, 643.99133], None],  # noqa: E501
         'wl_ratio': [None, 0.7],
         'lmc_weights_equil': [None, 'wl_delta'],
         'lmc_stats': ['no', 'wang_landau'],
diff --git a/ensemble_md/tests/test_utils.py b/ensemble_md/tests/test_utils.py
index cdb13c56..495fc68d 100644
--- a/ensemble_md/tests/test_utils.py
+++ b/ensemble_md/tests/test_utils.py
@@ -10,10 +10,15 @@
 """
 Unit tests for the module utils.py.
 """
+import os
 import sys
+import shutil
+import pytest
 import tempfile
+import subprocess
 import numpy as np
 from ensemble_md.utils import utils
+from unittest.mock import patch, MagicMock
 
 
 def test_logger():
@@ -39,6 +44,37 @@ def test_logger():
         sys.stdout = sys.__stdout__
 
 
+def test_run_gmx_cmd_success():
+    # Mock the subprocess.run return value for a successful execution
+    mock_successful_return = MagicMock()
+    mock_successful_return.returncode = 0
+    mock_successful_return.stdout = "Simulation complete"
+    mock_successful_return.stderr = None
+
+    with patch('subprocess.run', return_value=mock_successful_return) as mock_run:
+        return_code, stdout, stderr = utils.run_gmx_cmd(['gmx', 'mdrun', '-deffnm', 'sys'])
+
+    mock_run.assert_called_once_with(['gmx', 'mdrun', '-deffnm', 'sys'], capture_output=True, text=True, input=None, check=True)  # noqa: E501
+    assert return_code == 0
+    assert stdout == "Simulation complete"
+    assert stderr is None
+
+
+def test_run_gmx_cmd_failure():
+    # Mock the subprocess.run to raise a CalledProcessError for a failed execution
+    mock_failed_return = MagicMock()
+    mock_failed_return.returncode = 1
+    mock_failed_return.stderr = "Error encountered"
+
+    with patch('subprocess.run') as mock_run:
+        mock_run.side_effect = [subprocess.CalledProcessError(mock_failed_return.returncode, 'cmd', stderr=mock_failed_return.stderr)]  # noqa: E501
+        return_code, stdout, stderr = utils.run_gmx_cmd(['gmx', 'mdrun', '-deffnm', 'sys'])
+
+    assert return_code == 1
+    assert stdout is None
+    assert stderr == "Error encountered"
+
+
 def test_format_time():
     assert utils.format_time(0) == "0.0 second(s)"
     assert utils.format_time(1) == "1.0 second(s)"
@@ -96,3 +132,64 @@ def test_weighted_mean():
     mean, err = utils.weighted_mean(vals, errs)
     assert np.isclose(mean, 2.9997333688841485)
     assert np.isclose(err, 0.0577311783020254)
+
+    # 3. 0 in errs
+    vals = [1, 2, 3, 4]
+    errs = [0, 0.1, 0.1, 0.1]
+    mean, err = utils.weighted_mean(vals, errs)
+    assert mean == 2.5
+    assert err is None
+
+
+def test_calc_rmse():
+    # Test 1
+    data = [1, 2, 3, 4, 5]
+    ref = [2, 4, 6, 8, 10]
+    expected_rmse = np.sqrt(np.mean((np.array(data) - np.array(ref)) ** 2))
+    assert utils.calc_rmse(data, ref) == expected_rmse
+
+    # Test 2
+    ref = [1, 2, 3, 4, 5]
+    expected_rmse = 0
+    assert utils.calc_rmse(data, ref) == expected_rmse
+
+    # Test 3
+    data = [1, 2, 3]
+    ref = [1, 2]
+    with pytest.raises(ValueError):
+        utils.calc_rmse(data, ref)
+
+
+def test_get_time_metrics():
+    log = 'ensemble_md/tests/data/log/EXE.log'
+    t_metrics = {
+        'performance': 23.267,
+        't_wall': 3.721,
+        't_core': 29.713
+    }
+    assert utils.get_time_metrics(log) == t_metrics
+
+
+def test_analyze_REXEE_time():
+    # Set up directories and files
+    dirs = [f'ensemble_md/tests/data/log/sim_{i}/iteration_{j}' for i in range(2) for j in range(2)]
+    files = [f'ensemble_md/tests/data/log/EXE_{i}.log' for i in range(4)]
+    for i in range(4):
+        os.makedirs(dirs[i])
+        shutil.copy(files[i], os.path.join(dirs[i], 'EXE.log'))
+
+    # Test analyze_REXEE_time
+    # Case 1: Wrong paths
+    with pytest.raises(FileNotFoundError, match="No sim/iteration directories found."):
+        t_1, t_2, t_3 = utils.analyze_REXEE_time()  # This will try to find files from [natsort.natsorted(glob.glob(f'sim_*/iteration_{i}/*log')) for i in range(n_iter)]  # noqa: E501
+
+    # Case 2: Correct paths
+    log_files = [[f'ensemble_md/tests/data/log/sim_{i}/iteration_{j}/EXE.log' for i in range(2)] for j in range(2)]
+    t_1, t_2, t_3 = utils.analyze_REXEE_time(log_files=log_files)
+    assert t_1 == 2.125
+    assert np.isclose(t_2, 0.175)
+    assert t_3 == [[1.067, 0.94], [1.01, 1.058]]
+
+    # Clean up
+    for i in range(2):
+        shutil.rmtree(f'ensemble_md/tests/data/log/sim_{i}')
diff --git a/ensemble_md/utils/utils.py b/ensemble_md/utils/utils.py
index c595c403..e84878ff 100644
--- a/ensemble_md/utils/utils.py
+++ b/ensemble_md/utils/utils.py
@@ -284,19 +284,23 @@ def get_time_metrics(log):
         if 'Time: ' in l:
             t_metrics['t_core'] = float(l.split()[1])  # s
             t_metrics['t_wall'] = float(l.split()[2])  # s
-            break
 
     return t_metrics
 
 
-def analyze_REXEE_time(log_files=None):
+def analyze_REXEE_time(n_iter=None, log_files=None):
     """
     Perform simple data analysis on the wall times and performances of all iterations of an REXEE simulation.
 
     Parameters
     ----------
+    n_iter : None or int
+        The number of iterations in the REXEE simulation. If None, the function will try to find the number of
+        iterations by counting the number of directories named "iteration_*" in the simulation directory
+        (i.e., :code:`sim_0`) in the current working directory or where the log files are located.
     log_files : None or list
-        A list of sorted file names of all log files.
+        A list of lists log files with the shape of (n_iter, n_replicas). If None, the function will try to find
+        the log files by searching the current working directory.
 
     Returns
     -------
@@ -308,10 +312,18 @@ def analyze_REXEE_time(log_files=None):
     t_wall_list : list
         The list of wall times of finishing each mdrun command.
     """
-    n_iter = len(glob.glob('sim_0/iteration_*'))
+    if n_iter is None:
+        if log_files is None:
+            n_iter = len(glob.glob('sim_0/iteration_*'))
+        else:
+            n_iter = len(log_files)
+
     if log_files is None:
         log_files = [natsort.natsorted(glob.glob(f'sim_*/iteration_{i}/*log')) for i in range(n_iter)]
 
+    if len(log_files) == 0:
+        raise FileNotFoundError("No sim/iteration directories found.")
+
     t_wall_list = []
     t_wall_tot, t_sync = 0, 0
     for i in range(n_iter):

From 60daa8041bf02d82f4f9dc31434353b5a2e95e09 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Wed, 27 Mar 2024 00:17:12 +0800
Subject: [PATCH 17/41] Added test_clustering.py

---
 ensemble_md/analysis/analyze_traj.py   |  2 +-
 ensemble_md/tests/test_analyze_traj.py | 44 ++++++++++++++++++++++++++
 ensemble_md/tests/test_clustering.py   | 28 ++++++++++++++++
 3 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 ensemble_md/tests/test_clustering.py

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index e80588bc..07934d90 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -557,7 +557,7 @@ def plot_state_hist(trajs, state_ranges, fig_name, stack=True, figsize=None, pre
     return hist_data
 
 
-def calculate_hist_rmse(hist_data, state_ranges):
+def calc_hist_rmse(hist_data, state_ranges):
     """
     Calculates the RMSE of accumulated histogram counts of the state index. The reference
     is determined by assuming all alchemical states have equal chances to be visited, i.e.
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index 1b276461..edc5a6b9 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -31,6 +31,18 @@ def test_extract_state_traj():
     assert np.allclose(t, t_true)
 
 
+def test_stitch_time_series():
+    pass
+
+
+def test_convert_npy2xvg():
+    pass
+
+
+def test_stitch_time_series_for_sim():
+    pass
+
+
 def test_stitch_trajs():
     pass
 
@@ -51,5 +63,37 @@ def test_plot_state_hist():
     pass
 
 
+def test_calculate_hist_rmse():
+    pass
+
+
 def plot_transit_time():
     pass
+
+
+def test_plot_g_vecs():
+    pass
+
+
+def test_get_swaps():
+    pass
+
+
+def test_plot_swaps():
+    pass
+
+
+def test_get_g_evolution():
+    pass
+
+
+def test_get_dg_evoluation():
+    pass
+
+
+def test_plot_dg_evolution():
+    pass
+
+
+def test_get_delta_w_updates():
+    pass
diff --git a/ensemble_md/tests/test_clustering.py b/ensemble_md/tests/test_clustering.py
new file mode 100644
index 00000000..5e8825f2
--- /dev/null
+++ b/ensemble_md/tests/test_clustering.py
@@ -0,0 +1,28 @@
+####################################################################
+#                                                                  #
+#    ensemble_md,                                                  #
+#    a python package for running GROMACS simulation ensembles     #
+#                                                                  #
+#    Written by Wei-Tse Hsu <wehs7661@colorado.edu>                #
+#    Copyright (c) 2022 University of Colorado Boulder             #
+#                                                                  #
+####################################################################
+"""
+Unit tests for the module analyze_traj.py.
+"""
+
+
+def test_cluster_traj():
+    pass
+
+
+def test_get_cluster_info():
+    pass
+
+
+def test_get_cluster_members():
+    pass
+
+
+def test_count_transitions():
+    pass

From 6d10c6d6760db58b9a9dbab42a5ddc96ce63fabb Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Wed, 27 Mar 2024 03:20:15 +0800
Subject: [PATCH 18/41] Added tests for traj2transmtx and plot_rep_trajs

---
 ensemble_md/tests/test_analyze_traj.py | 153 ++++++++++++++++++++++++-
 1 file changed, 147 insertions(+), 6 deletions(-)

diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index edc5a6b9..9bde6618 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -12,6 +12,7 @@
 """
 import os
 import numpy as np
+from unittest.mock import patch
 from ensemble_md.analysis import analyze_traj
 
 current_path = os.path.dirname(os.path.abspath(__file__))
@@ -36,7 +37,34 @@ def test_stitch_time_series():
 
 
 def test_convert_npy2xvg():
-    pass
+    # Create dummy input data
+    trajs = np.array([[0, 1, 2, 3], [4, 5, 6, 7]], dtype=int)
+    dt = 0.1  # ps
+    subsampling = 2
+
+    os.chdir('ensemble_md/tests/data')
+    analyze_traj.convert_npy2xvg(trajs, dt, subsampling)
+
+    assert os.path.exists('traj_0.xvg')
+    assert os.path.exists('traj_1.xvg')
+
+    with open('traj_0.xvg', 'r') as f:
+        content = f.readlines()
+        assert content[0] == '# This file was created by ensemble_md\n'
+        assert content[1] == '# Time (ps) v.s. State index\n'
+        assert content[2] == '0.0         0\n'
+        assert content[3] == '0.2         2\n'
+
+    with open('traj_1.xvg', 'r') as f:
+        content = f.readlines()
+        assert content[0] == '# This file was created by ensemble_md\n'
+        assert content[1] == '# Time (ps) v.s. State index\n'
+        assert content[2] == '0.0         4\n'
+        assert content[3] == '0.2         6\n'
+
+    os.remove('traj_0.xvg')
+    os.remove('traj_1.xvg')
+    os.chdir('../../../')
 
 
 def test_stitch_time_series_for_sim():
@@ -48,11 +76,124 @@ def test_stitch_trajs():
 
 
 def test_traj2transmtx():
-    pass
-
-
-def test_plot_rep_trajs():
-    pass
+    traj = [0, 1, 2, 1, 0, 3]
+    N = 4  # matrix size
+
+    # Case 1: normalize=False
+    array = np.array([
+        [0, 1, 0, 1],
+        [1, 0, 1, 0],
+        [0, 1, 0, 0],
+        [0, 0, 0, 0]])
+    np.testing.assert_array_equal(analyze_traj.traj2transmtx(traj, N, normalize=False), array)
+
+    # Case 2: normalize=True
+    array = np.array([
+        [0, 0.5, 0, 0.5],
+        [0.5, 0, 0.5, 0],
+        [0, 1, 0, 0],
+        [0, 0, 0, 0]])
+    np.testing.assert_array_equal(analyze_traj.traj2transmtx(traj, N, normalize=True), array)
+
+
+@patch('ensemble_md.analysis.analyze_traj.plt')  # to mock and replace plt (which is matplotlib.pyplot) in analyze_traj
+def test_plot_rep_trajs(mock_plt):
+    # Not much we can do to test the plot function except to verify if the expected matplotlib functions were called.
+    fig_name = 'ensemble_md/tests/data/test.png'
+    cmap = mock_plt.cm.ocean
+
+    # Case 1: Short trajs without dt and stride
+    trajs = np.array([[0, 1, 1, 0, 2], [1, 0, 1, 2, 0], [2, 0, 1, 0, 2]], dtype=int)
+    n_sim = len(trajs)
+    colors = [cmap(i) for i in np.arange(n_sim) / n_sim]
+
+    analyze_traj.plot_rep_trajs(trajs, fig_name)
+
+    x_input = np.array([0, 1, 2, 3, 4])
+    y_input_1 = np.array([0, 1, 1, 0, 2])
+    y_input_2 = np.array([1, 0, 1, 2, 0])
+    y_input_3 = np.array([2, 0, 1, 0, 2])
+
+    # Verify that the expected matplotlib functions were called
+    mock_plt.figure.assert_called_once()
+    mock_plt.plot.assert_called()
+    mock_plt.xlabel.assert_called_with('MC moves')
+    mock_plt.ylabel.assert_called_with('Replica')
+    mock_plt.grid.assert_called_once()
+    mock_plt.legend.assert_called_once()
+    mock_plt.savefig.assert_called_once_with(fig_name, dpi=600)
+    assert mock_plt.plot.call_count == len(trajs)
+
+    # mock_plt.plot.assert_any_call(x_input, y_input_1, color=colors[0], label='Trajectory 0')
+    # There is a bug in unittest.mock such that there will be an ValueError upon comparisons of two arrays.
+    # Therefore, the line above would fail and we can only use assert_called (as shown above) and compare
+    # x_input, y_input_1, etc. with the called arguments.
+    # Here mock_plt.plot.call_args_list prints as below. Note that a `call` object can be indexed like a tuple
+    # where index 0 contains the positional arguments, and index 1 contains the keyword arguments.
+    # [
+    #     call(array([0, 1, 2, 3, 4]), array([0, 1, 1, 0, 2]), color=<MagicMock name='plt.cm.ocean()' id='140575569521536'>, label='Trajectory 0', marker='o'),  # noqa: E501
+    #     call(array([0, 1, 2, 3, 4]), array([1, 0, 1, 2, 0]), color=<MagicMock name='plt.cm.ocean()' id='140575569521536'>, label='Trajectory 1', marker='o'),  # noqa: E501
+    #     call(array([0, 1, 2, 3, 4]), array([2, 0, 1, 0, 2]), color=<MagicMock name='plt.cm.ocean()' id='140575569521536'>, label='Trajectory 2', marker='o')   # noqa: E501
+    # ]
+
+    assert all(np.array_equal(a, b) for a, b in zip(mock_plt.plot.call_args_list[0][0], (x_input, y_input_1)))
+    assert all(np.array_equal(a, b) for a, b in zip(mock_plt.plot.call_args_list[1][0], (x_input, y_input_2)))
+    assert all(np.array_equal(a, b) for a, b in zip(mock_plt.plot.call_args_list[2][0], (x_input, y_input_3)))
+    assert mock_plt.plot.call_args_list[0][1] == {'color': colors[0], 'label': 'Trajectory 0', 'marker': 'o'}
+    assert mock_plt.plot.call_args_list[1][1] == {'color': colors[1], 'label': 'Trajectory 1', 'marker': 'o'}
+    assert mock_plt.plot.call_args_list[2][1] == {'color': colors[2], 'label': 'Trajectory 2', 'marker': 'o'}
+
+    # Case 2: Short trajs with dt and stride
+    mock_plt.reset_mock()
+
+    dt = 0.2  # ps
+    stride = 2
+    analyze_traj.plot_rep_trajs(trajs, fig_name, dt, stride)
+    x_input = np.array([0, 0.4, 0.8])
+    y_input_1 = np.array([0, 1, 2])
+    y_input_2 = np.array([1, 1, 0])
+    y_input_3 = np.array([2, 1, 2])
+
+    mock_plt.figure.assert_called_once()
+    mock_plt.plot.assert_called()
+    mock_plt.xlabel.assert_called_with('Time (ps)')
+    mock_plt.ylabel.assert_called_with('Replica')
+    mock_plt.grid.assert_called_once()
+    mock_plt.legend.assert_called_once()
+    mock_plt.savefig.assert_called_once_with(fig_name, dpi=600)
+    assert mock_plt.plot.call_count == len(trajs)
+
+    assert all(np.array_equal(a, b) for a, b in zip(mock_plt.plot.call_args_list[0][0], (x_input, y_input_1)))
+    assert all(np.array_equal(a, b) for a, b in zip(mock_plt.plot.call_args_list[1][0], (x_input, y_input_2)))
+    assert all(np.array_equal(a, b) for a, b in zip(mock_plt.plot.call_args_list[2][0], (x_input, y_input_3)))
+    assert mock_plt.plot.call_args_list[0][1] == {'color': colors[0], 'label': 'Trajectory 0', 'marker': 'o'}
+    assert mock_plt.plot.call_args_list[1][1] == {'color': colors[1], 'label': 'Trajectory 1', 'marker': 'o'}
+    assert mock_plt.plot.call_args_list[2][1] == {'color': colors[2], 'label': 'Trajectory 2', 'marker': 'o'}
+
+    # Case 3: Long trajs with dt and without stride
+    mock_plt.reset_mock()
+
+    trajs = np.random.randint(low=0, high=2, size=(3, 2000000))
+    analyze_traj.plot_rep_trajs(trajs, fig_name, dt)
+    mock_plt.figure.assert_called_once()
+    mock_plt.plot.assert_called()
+    mock_plt.xlabel.assert_called_with('Time (ns)')
+    mock_plt.ylabel.assert_called_with('Replica')
+    mock_plt.grid.assert_called_once()
+    mock_plt.legend.assert_called_once()
+    mock_plt.savefig.assert_called_once_with(fig_name, dpi=600)
+    assert mock_plt.plot.call_count == len(trajs)
+
+    # Here we only check the lengths of x and y inputs
+    assert len(mock_plt.plot.call_args_list[0][0][0]) == 2000000 / 100
+    assert len(mock_plt.plot.call_args_list[0][0][1]) == 2000000 / 100
+    assert len(mock_plt.plot.call_args_list[1][0][0]) == 2000000 / 100
+    assert len(mock_plt.plot.call_args_list[1][0][1]) == 2000000 / 100
+    assert len(mock_plt.plot.call_args_list[2][0][0]) == 2000000 / 100
+    assert len(mock_plt.plot.call_args_list[2][0][1]) == 2000000 / 100
+    assert mock_plt.plot.call_args_list[0][1] == {'color': colors[0], 'label': 'Trajectory 0'}
+    assert mock_plt.plot.call_args_list[1][1] == {'color': colors[1], 'label': 'Trajectory 1'}
+    assert mock_plt.plot.call_args_list[2][1] == {'color': colors[2], 'label': 'Trajectory 2'}
 
 
 def test_plot_state_trajs():

From b55791946930dce06a0a7bee286c02358b07eed0 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Thu, 28 Mar 2024 18:44:47 +0800
Subject: [PATCH 19/41] Some attempts of using assert_plt_calls to test
 multiple calls

---
 ensemble_md/tests/test_analyze_traj.py | 95 +++++++++++++++++++++++---
 1 file changed, 85 insertions(+), 10 deletions(-)

diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index 9bde6618..f35a3a65 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -12,13 +12,39 @@
 """
 import os
 import numpy as np
-from unittest.mock import patch
+from unittest.mock import patch, MagicMock
 from ensemble_md.analysis import analyze_traj
 
 current_path = os.path.dirname(os.path.abspath(__file__))
 input_path = os.path.join(current_path, "data")
 
 
+def assert_plt_calls(mock_plt, call_specs):
+    """
+    Assert calls to matplotlib functions with specified parameters.
+
+    Parameters
+    ----------
+    mock_plt : :code:`MagicMock` object
+        Mock object of :code:`matplotlib.pyplot`.
+    call_specs : list
+        A list of lists that contains the following four elements:
+          - The name of the matplotlib function (as :code:`str`) that was called.
+          - The assert method (as :code:`str`), e.g., :code:`assert_called_once_with`.
+          - The positional arguments (as :code:`tuple`) passed to the matplotlib function.
+          - The keyword arguments (as :code:`dict`) passed to the matplotlib function.
+    """
+    for call_spec in call_specs:
+        plt_func = call_spec[0]
+        assert_method = call_spec[1]
+        plt_args = call_spec[2]
+        plt_kwargs = call_spec[3]
+
+        mock_func = getattr(mock_plt, plt_func)
+        assert_func = getattr(mock_func, assert_method)
+        assert_func(*plt_args, **plt_kwargs)  # call the assertion method
+
+
 def test_extract_state_traj():
     traj, t = analyze_traj.extract_state_traj(os.path.join(input_path, 'dhdl/dhdl_0.xvg'))
     state_list = [
@@ -115,13 +141,28 @@ def test_plot_rep_trajs(mock_plt):
     y_input_3 = np.array([2, 0, 1, 0, 2])
 
     # Verify that the expected matplotlib functions were called
-    mock_plt.figure.assert_called_once()
-    mock_plt.plot.assert_called()
-    mock_plt.xlabel.assert_called_with('MC moves')
-    mock_plt.ylabel.assert_called_with('Replica')
-    mock_plt.grid.assert_called_once()
-    mock_plt.legend.assert_called_once()
-    mock_plt.savefig.assert_called_once_with(fig_name, dpi=600)
+    # mock_plt.figure.assert_called_once()
+    # mock_plt.plot.assert_called()
+    # mock_plt.xlabel.assert_called_with('MC moves')
+    # mock_plt.ylabel.assert_called_with('Replica')
+    # mock_plt.grid.assert_called_once()
+    # mock_plt.legend.assert_called_once()
+    # mock_plt.savefig.assert_called_once_with(fig_name, dpi=600)
+    
+    # Using assert_plt_calls, the lines above can be written as below
+    call_specs = [
+        ['figure', 'assert_called_once', (), {}],
+        ['plot', 'assert_called', (), {}],
+        ['xlabel', 'assert_called_with', ('MC moves',), {}],
+        ['ylabel', 'assert_called_with', ('Replica',), {}],
+        ['grid', 'assert_called_once', (), {}],
+        ['legend', 'assert_called_once', (), {}],
+        ['savefig', 'assert_called_once_with', (fig_name,), {'dpi': 600}]
+    ]
+    assert_plt_calls(mock_plt, call_specs)
+    
+
+
     assert mock_plt.plot.call_count == len(trajs)
 
     # mock_plt.plot.assert_any_call(x_input, y_input_1, color=colors[0], label='Trajectory 0')
@@ -196,8 +237,42 @@ def test_plot_rep_trajs(mock_plt):
     assert mock_plt.plot.call_args_list[2][1] == {'color': colors[2], 'label': 'Trajectory 2'}
 
 
-def test_plot_state_trajs():
-    pass
+@patch('ensemble_md.analysis.analyze_traj.plt')
+def test_plot_state_trajs(mock_plt):
+    state_ranges = [[0, 1, 2, 3], [2, 3, 4, 5]]
+    fig_name = 'ensemble_md/tests/data/test.png'
+    cmap = mock_plt.cm.ocean
+    n_sim = len(state_ranges)
+    colors = [cmap(i) for i in np.arange(n_sim) / n_sim]
+
+    # Mock the return value of plt.subplots to return a tuple of two mock objects
+    # We need this because plot_state_trajs calls _, ax = plt.subplots(...). When we mock 
+    # matplolib.pyplot using mock_plt, plt.subplots will be replaced by mock_plt.subplots
+    # and will return a mock object, not the tuple of figure and axes objects that the real plt.subplots returns.
+    # This would in turn lead to an ValueError. To avoid this, we need to mock the return values of plt.subplots.
+    mock_figure = MagicMock()
+    mock_axes = MagicMock()
+    mock_plt.subplots.return_value = (mock_figure, mock_axes)
+
+    # Case 1: Short trajs without dt and stride
+    trajs = np.array([[0, 1, 0, 2, 3, 4, 3, 4, 5, 4], [2, 3, 4, 5, 4, 3, 2, 1, 0, 1]], dtype=int)
+
+    analyze_traj.plot_state_trajs(trajs, state_ranges, fig_name)
+
+    x_input = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    
+    mock_plt.subplots.assert_called_once_with(nrows=1, ncols=2, figsize=(5, 2.5))
+    mock_plt.subplot.assert_called()
+    mock_plt.plot.assert_called()
+    mock_plt.fill_between.assert_called()
+    mock_plt.xlabel.assert_called_with('MC moves')
+    mock_plt.ylabel.assert_called_with('State')
+    mock_plt.grid.assert_called()
+
+    assert mock_plt.subplot.call_count == len(state_ranges)
+    assert mock_plt.plot.call_count == len(state_ranges)
+    assert mock_plt.grid.call_count == len(state_ranges)
+    assert mock_plt.fill_between.call_count == len(state_ranges) ** 2
 
 
 def test_plot_state_hist():

From 3c7d2df25179d1ddceb7caa7e42230b22a42f9c3 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Fri, 29 Mar 2024 00:08:41 +0800
Subject: [PATCH 20/41] Tweaked stitch_time_series_for_sim to check continuity
 of trajectories to be stitched.

---
 ensemble_md/analysis/analyze_traj.py | 70 +++++++++++++++-------------
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 07934d90..d7ce436a 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -103,7 +103,7 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
                     t = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, 0]  # only used if save_xvg is True
             else:
                 # Starting from the 2nd iteration, we get rid of the first time frame the first
-                # frame of iteration n+1 the is the same as the last frame of iteration n
+                # frame of iteration n+1 is the same as the last frame of iteration n
                 if dhdl:
                     traj, t = extract_state_traj(files_sorted[i][j])
                     traj, t = traj[1:], t[1:]
@@ -124,39 +124,12 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
     return trajs
 
 
-def convert_npy2xvg(trajs, dt, subsampling=1):
-    """
-    Convert a :code:`state_trajs.npy` or :code:`cv_trajs.npy` file to :math:`N_{\text{rep}}` XVG files
-    that have two columns: time (ps) and state index.
-
-    Parameters
-    ----------
-    trajs : ndarray
-        The state-space or CV-space trajectories read from :code:`state_trajs.npy` or :code:`cv_trajs.npy`.
-    dt : float
-        The time interval (in ps) between consecutive frames of the trajectories.
-    subsampling : int
-        The stride for subsampling the time series. The default is 1.
-    """
-    n_configs = len(trajs)
-    for i in range(n_configs):
-        traj = trajs[i]
-        t = np.arange(len(traj)) * dt
-        headers = ['This file was created by ensemble_md']
-        if 'int' in str(traj.dtype):
-            headers.extend(['Time (ps) v.s. State index'])
-            np.savetxt(f'traj_{i}.xvg', np.transpose([t[::subsampling], traj[::subsampling]]), header='\n'.join(headers), fmt=['%-8.1f', '%4.0f'])  # noqa: E501
-        else:
-            headers.extend(['Time (ps) v.s. CV'])
-            np.savetxt(f'traj_{i}.xvg', np.transpose([t[::subsampling], traj[::subsampling]]), header='\n'.join(headers), fmt=['%-8.1f', '%8.6f'])  # noqa: E501
-
-
 def stitch_time_series_for_sim(files, shifts=None, dhdl=True, col_idx=-1, save=True):
     """
     Stitches the state-space/CV-space time series in the same replica/simulation folder.
     That is, the output time series is contributed by multiple different trajectories (initiated by
     different starting configurations) to a certain alchemical range.
-
+    
     Parameters
     ----------
     files : list
@@ -191,14 +164,21 @@ def stitch_time_series_for_sim(files, shifts=None, dhdl=True, col_idx=-1, save=T
     for i in range(n_sim):
         for j in range(n_iter):
             if dhdl:
-                traj, _ = extract_state_traj(files[i][j])
+                traj, t = extract_state_traj(files[i][j])
             else:
                 traj = np.loadtxt(files[i][j], comments=['#', '@'])[:, col_idx]
+                t = np.loadtxt(files[i][j], comments=['#', '@'])[:, 0]
 
             if dhdl:
                 traj = list(np.array(traj) + shifts[i])
 
             if j != 0:
+                # Check the continuity of the trajectory
+                if traj[0] != trajs[i][-1] or t[0] != trajs[i][-1]:
+                    err_str = f'The first frame of iteration {j} in replica {i} is not continuous with the last frame of the previous iteration.'  # noqa: E501
+                    err_str += f'Please check files {files[i][j - 1]} and {files[i][j]}.'
+                    raise ValueError(err_str)
+                
                 traj = traj[:-1]  # remove the last frame, which is the same as the first of the next time series.
             trajs[i].extend(traj)
 
@@ -247,6 +227,33 @@ def stitch_trajs(gmx_executable, files, rep_trajs):
             print(f'Error with return code: {returncode}):\n{stderr}')
 
 
+def convert_npy2xvg(trajs, dt, subsampling=1):
+    """
+    Convert a :code:`state_trajs.npy` or :code:`cv_trajs.npy` file to :math:`N_{\text{rep}}` XVG files
+    that have two columns: time (ps) and state index.
+
+    Parameters
+    ----------
+    trajs : ndarray
+        The state-space or CV-space trajectories read from :code:`state_trajs.npy` or :code:`cv_trajs.npy`.
+    dt : float
+        The time interval (in ps) between consecutive frames of the trajectories.
+    subsampling : int
+        The stride for subsampling the time series. The default is 1.
+    """
+    n_configs = len(trajs)
+    for i in range(n_configs):
+        traj = trajs[i]
+        t = np.arange(len(traj)) * dt
+        headers = ['This file was created by ensemble_md']
+        if 'int' in str(traj.dtype):
+            headers.extend(['Time (ps) v.s. State index'])
+            np.savetxt(f'traj_{i}.xvg', np.transpose([t[::subsampling], traj[::subsampling]]), header='\n'.join(headers), fmt=['%-8.1f', '%4.0f'])  # noqa: E501
+        else:
+            headers.extend(['Time (ps) v.s. CV'])
+            np.savetxt(f'traj_{i}.xvg', np.transpose([t[::subsampling], traj[::subsampling]]), header='\n'.join(headers), fmt=['%-8.1f', '%8.6f'])  # noqa: E501
+
+
 def traj2transmtx(traj, N, normalize=True):
     """
     Computes the transition matrix given a trajectory. For example, if a state-space
@@ -336,7 +343,7 @@ def plot_rep_trajs(trajs, fig_name, dt=None, stride=None):
     plt.savefig(f'{fig_name}', dpi=600)
 
 
-def plot_state_trajs(trajs, state_ranges, fig_name, dt=None, stride=1, title_prefix='Trajectory'):
+def plot_state_trajs(trajs, state_ranges, fig_name, dt=None, stride=None, title_prefix='Trajectory'):
     """
     Plots the time series of state index.
 
@@ -405,7 +412,6 @@ def plot_state_trajs(trajs, state_ranges, fig_name, dt=None, stride=1, title_pre
             linewidth = 1  # this is the default
 
         # Finally, plot the trajectories
-        linewidth = 1  # this is the default
         plt.plot(x[::stride], trajs[i][::stride], color=colors[i], linewidth=linewidth)
         if dt is None:
             plt.xlabel('MC moves')

From b399004496082374a497998d359e7469c47448e5 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Fri, 29 Mar 2024 00:17:43 +0800
Subject: [PATCH 21/41] Fixed a minor bug in stitch_time_series_for_sim

---
 ensemble_md/analysis/analyze_traj.py | 9 +++------
 ensemble_md/cli/analyze_REXEE.py     | 3 +--
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index d7ce436a..1af2df27 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -124,7 +124,7 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
     return trajs
 
 
-def stitch_time_series_for_sim(files, shifts=None, dhdl=True, col_idx=-1, save=True):
+def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
     """
     Stitches the state-space/CV-space time series in the same replica/simulation folder.
     That is, the output time series is contributed by multiple different trajectories (initiated by
@@ -136,9 +136,6 @@ def stitch_time_series_for_sim(files, shifts=None, dhdl=True, col_idx=-1, save=T
         A list of lists of file names of GROMACS DHDL files or general GROMACS XVG files
         or PLUMED output files. Specifically, :code:`files[i]` should be a list containing
         the files of interest from all iterations in replica :code:`i`. The files should be sorted naturally.
-    shifts : list
-        A list of values for shifting the state indices for each replica. The length of the list
-        should be equal to the number of replicas. This is only needed when :code:`dhdl=True`.
     dhdl : bool
         Whether the input files are GROMACS dhdl files, in which case trajectories of global alchemical indices
         will be generated. If :code:`dhdl=False`, the input files must be readable by `numpy.loadtxt` assuming that
@@ -169,8 +166,8 @@ def stitch_time_series_for_sim(files, shifts=None, dhdl=True, col_idx=-1, save=T
                 traj = np.loadtxt(files[i][j], comments=['#', '@'])[:, col_idx]
                 t = np.loadtxt(files[i][j], comments=['#', '@'])[:, 0]
 
-            if dhdl:
-                traj = list(np.array(traj) + shifts[i])
+            # Note that there is no need to shift the indices for the same replica, which same the same set of states
+            # traj = list(np.array(traj) + shifts[i])
 
             if j != 0:
                 # Check the continuity of the trajectory
diff --git a/ensemble_md/cli/analyze_REXEE.py b/ensemble_md/cli/analyze_REXEE.py
index 9b8f8149..7dcdead6 100644
--- a/ensemble_md/cli/analyze_REXEE.py
+++ b/ensemble_md/cli/analyze_REXEE.py
@@ -186,8 +186,7 @@ def main():
         # This may take a while.
         print('2-4. Stitching time series of state index for each alchemical range ...')
         dhdl_files = [natsort.natsorted(glob.glob(f'sim_{i}/iteration_*/*dhdl*xvg')) for i in range(REXEE.n_sim)]
-        shifts = np.arange(REXEE.n_sim) * REXEE.s
-        state_trajs_for_sim = analyze_traj.stitch_time_series_for_sim(dhdl_files, shifts)
+        state_trajs_for_sim = analyze_traj.stitch_time_series_for_sim(dhdl_files)
 
     # 2-5. Plot the time series of state index for different alchemical ranges
     print('\n2-5. Plotting the time series of state index for different alchemical ranges ...')

From 8f88afa6ad6b5e87a73a695ead0bc9712e67e3e4 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Fri, 29 Mar 2024 00:33:20 +0800
Subject: [PATCH 22/41] Minor tweaks

---
 ensemble_md/analysis/analyze_traj.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 1af2df27..66ceb50b 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -158,6 +158,7 @@ def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
     n_sim = len(files)      # number of replicas
     n_iter = len(files[0])  # number of iterations per replica
     trajs = [[] for i in range(n_sim)]
+    t_last = None    # just for checking the continuity of the trajectory
     for i in range(n_sim):
         for j in range(n_iter):
             if dhdl:
@@ -171,13 +172,14 @@ def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
 
             if j != 0:
                 # Check the continuity of the trajectory
-                if traj[0] != trajs[i][-1] or t[0] != trajs[i][-1]:
+                if traj[0] != trajs[i][-1] or t[0] != t_last:
                     err_str = f'The first frame of iteration {j} in replica {i} is not continuous with the last frame of the previous iteration.'  # noqa: E501
                     err_str += f'Please check files {files[i][j - 1]} and {files[i][j]}.'
                     raise ValueError(err_str)
                 
                 traj = traj[:-1]  # remove the last frame, which is the same as the first of the next time series.
             trajs[i].extend(traj)
+            t_last = t[-1]
 
     # Save the trajectories as an NPY file if desired
     if save is True:

From bac786e67f806ed8c10214dfe2bf1c3361f7a6b4 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Fri, 29 Mar 2024 01:06:21 +0800
Subject: [PATCH 23/41] Tweaked stitch_time_series

---
 ensemble_md/analysis/analyze_traj.py | 52 +++++++++++++++++-----------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 66ceb50b..3f38ed33 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -93,26 +93,32 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
 
     # Then, stitch the trajectories for each starting configuration
     trajs = [[] for i in range(n_configs)]  # for each starting configuration
+    t_last, val_last = None, None    # just for checking the continuity of the trajectory
     for i in range(n_configs):
         for j in range(n_iter):
-            if j == 0:
-                if dhdl:
-                    traj, t = extract_state_traj(files_sorted[i][j])
-                else:
-                    traj = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, col_idx]
-                    t = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, 0]  # only used if save_xvg is True
+            if dhdl:
+                traj, t = extract_state_traj(files_sorted[i][j])
             else:
-                # Starting from the 2nd iteration, we get rid of the first time frame the first
-                # frame of iteration n+1 is the same as the last frame of iteration n
-                if dhdl:
-                    traj, t = extract_state_traj(files_sorted[i][j])
-                    traj, t = traj[1:], t[1:]
-                else:
-                    traj = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, col_idx][1:]
+                traj = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, col_idx]
+                t = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, 0]
+
+            # Shift the indices so that global indices are used.
+            shift_idx = rep_trajs[i][j]
+            traj = list(np.array(traj) + shifts[shift_idx])
+
+            if j != 0:
+                # Check the continuity of the trajectory
+                if traj[0] != val_last or t[0] != t_last:
+                    err_str = f'The first frame of iteration {j} of starting configuration {i} is not continuous with the last frame of the previous iteration. '
+                    err_str += f'Please check files {files_sorted[i][j - 1]} and {files_sorted[i][j]}.'
+                    raise ValueError(err_str)
 
-            if dhdl:  # Trajectories of global alchemical indices will be generated.
-                shift_idx = rep_trajs[i][j]
-                traj = list(np.array(traj) + shifts[shift_idx])
+            t_last = t[-1]
+            val_last = traj[-1]
+
+            if j != 0:
+                traj = traj[:-1]  # remove the last frame, which is the same as the first of the next time series.
+            
             trajs[i].extend(traj)
 
     if save_npy is True:
@@ -158,7 +164,7 @@ def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
     n_sim = len(files)      # number of replicas
     n_iter = len(files[0])  # number of iterations per replica
     trajs = [[] for i in range(n_sim)]
-    t_last = None    # just for checking the continuity of the trajectory
+    t_last, val_last = None, None    # just for checking the continuity of the trajectory
     for i in range(n_sim):
         for j in range(n_iter):
             if dhdl:
@@ -172,14 +178,18 @@ def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
 
             if j != 0:
                 # Check the continuity of the trajectory
-                if traj[0] != trajs[i][-1] or t[0] != t_last:
-                    err_str = f'The first frame of iteration {j} in replica {i} is not continuous with the last frame of the previous iteration.'  # noqa: E501
+                if traj[0] != val_last or t[0] != t_last:
+                    err_str = f'The first frame of iteration {j} in replica {i} is not continuous with the last frame of the previous iteration. '  # noqa: E501
                     err_str += f'Please check files {files[i][j - 1]} and {files[i][j]}.'
                     raise ValueError(err_str)
-                
+
+            t_last = t[-1]
+            val_last = traj[-1]
+
+            if j != 0:
                 traj = traj[:-1]  # remove the last frame, which is the same as the first of the next time series.
+            
             trajs[i].extend(traj)
-            t_last = t[-1]
 
     # Save the trajectories as an NPY file if desired
     if save is True:

From fa205ac9dde1b304c8585fb039043e1af3da1b54 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Fri, 29 Mar 2024 01:18:26 +0800
Subject: [PATCH 24/41] Added a test for plot_state_trajs and developed
 save_and_exclude; Some other minor tweaks

---
 ensemble_md/analysis/analyze_traj.py   |   8 +-
 ensemble_md/tests/test_analyze_traj.py | 133 ++++++++++++++++---------
 2 files changed, 92 insertions(+), 49 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 3f38ed33..8e4b66f8 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -109,7 +109,7 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
             if j != 0:
                 # Check the continuity of the trajectory
                 if traj[0] != val_last or t[0] != t_last:
-                    err_str = f'The first frame of iteration {j} of starting configuration {i} is not continuous with the last frame of the previous iteration. '
+                    err_str = f'The first frame of iteration {j} of starting configuration {i} is not continuous with the last frame of the previous iteration. '  # noqa: E501
                     err_str += f'Please check files {files_sorted[i][j - 1]} and {files_sorted[i][j]}.'
                     raise ValueError(err_str)
 
@@ -118,7 +118,7 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
 
             if j != 0:
                 traj = traj[:-1]  # remove the last frame, which is the same as the first of the next time series.
-            
+
             trajs[i].extend(traj)
 
     if save_npy is True:
@@ -135,7 +135,7 @@ def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
     Stitches the state-space/CV-space time series in the same replica/simulation folder.
     That is, the output time series is contributed by multiple different trajectories (initiated by
     different starting configurations) to a certain alchemical range.
-    
+
     Parameters
     ----------
     files : list
@@ -188,7 +188,7 @@ def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
 
             if j != 0:
                 traj = traj[:-1]  # remove the last frame, which is the same as the first of the next time series.
-            
+
             trajs[i].extend(traj)
 
     # Save the trajectories as an NPY file if desired
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index f35a3a65..9947e930 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -11,6 +11,7 @@
 Unit tests for the module analyze_traj.py.
 """
 import os
+import shutil
 import numpy as np
 from unittest.mock import patch, MagicMock
 from ensemble_md.analysis import analyze_traj
@@ -19,30 +20,28 @@
 input_path = os.path.join(current_path, "data")
 
 
-def assert_plt_calls(mock_plt, call_specs):
+def save_and_exclude(f_input, n_exclude, f_output=None):
     """
-    Assert calls to matplotlib functions with specified parameters.
+    Saves a given file as another file while exlucding the last :code:`n_exclude` lines.
 
     Parameters
     ----------
-    mock_plt : :code:`MagicMock` object
-        Mock object of :code:`matplotlib.pyplot`.
-    call_specs : list
-        A list of lists that contains the following four elements:
-          - The name of the matplotlib function (as :code:`str`) that was called.
-          - The assert method (as :code:`str`), e.g., :code:`assert_called_once_with`.
-          - The positional arguments (as :code:`tuple`) passed to the matplotlib function.
-          - The keyword arguments (as :code:`dict`) passed to the matplotlib function.
+    f_input : str
+        The input file.
+    n_exclude : n
+        Number of lines to exclude.
+    f_output : str
+        The output file. If None, the output file will be the file name of the input
+        appendd with "_short".
     """
-    for call_spec in call_specs:
-        plt_func = call_spec[0]
-        assert_method = call_spec[1]
-        plt_args = call_spec[2]
-        plt_kwargs = call_spec[3]
+    if f_output is None:
+        f_output = f_input.split('.')[0] + '_short.' + f_input.split('.')[1]
 
-        mock_func = getattr(mock_plt, plt_func)
-        assert_func = getattr(mock_func, assert_method)
-        assert_func(*plt_args, **plt_kwargs)  # call the assertion method
+    with open(f_input, 'r') as f:
+        lines = f.readlines()[:-n_exclude]
+
+    with open(f_output, 'w') as f:
+        f.writelines(lines)
 
 
 def test_extract_state_traj():
@@ -94,7 +93,22 @@ def test_convert_npy2xvg():
 
 
 def test_stitch_time_series_for_sim():
-    pass
+    # Set up files for testing
+    for sim in range(2):
+        for iteration in range(2):
+            target_dir = f'ensemble_md/tests/data/stitch_test/sim_{sim}/iteration_{iteration}'
+            os.makedirs(target_dir)
+            shutil.copy(f'ensemble_md/tests/data/dhdl/dhdl_{sim * 2 + iteration}.xvg', f'{target_dir}/dhdl.xvg')
+            save_and_exclude(f'{target_dir}/dhdl.xvg', 40)  # just keep the first 10 frames
+
+    # files = [[f'ensemble_md/tests/data/stitch_test/sim_{i}/iteration_{j}/dhdl_short.xvg' for j in range(2)] for i in range(2)]  # noqa: E501
+    # shifts = [1, 1]
+
+    # More to come ...
+    # trajs_test = analyze_traj.stitch_time_series_for_sim(files, shifts, save=True)
+    # trajs_expected = [
+    #     [0, 0, 3, 1, 4, 4, 5, 4, 5, 5, 4]
+    # ]
 
 
 def test_stitch_trajs():
@@ -141,28 +155,13 @@ def test_plot_rep_trajs(mock_plt):
     y_input_3 = np.array([2, 0, 1, 0, 2])
 
     # Verify that the expected matplotlib functions were called
-    # mock_plt.figure.assert_called_once()
-    # mock_plt.plot.assert_called()
-    # mock_plt.xlabel.assert_called_with('MC moves')
-    # mock_plt.ylabel.assert_called_with('Replica')
-    # mock_plt.grid.assert_called_once()
-    # mock_plt.legend.assert_called_once()
-    # mock_plt.savefig.assert_called_once_with(fig_name, dpi=600)
-    
-    # Using assert_plt_calls, the lines above can be written as below
-    call_specs = [
-        ['figure', 'assert_called_once', (), {}],
-        ['plot', 'assert_called', (), {}],
-        ['xlabel', 'assert_called_with', ('MC moves',), {}],
-        ['ylabel', 'assert_called_with', ('Replica',), {}],
-        ['grid', 'assert_called_once', (), {}],
-        ['legend', 'assert_called_once', (), {}],
-        ['savefig', 'assert_called_once_with', (fig_name,), {'dpi': 600}]
-    ]
-    assert_plt_calls(mock_plt, call_specs)
-    
-
-
+    mock_plt.figure.assert_called_once()
+    mock_plt.plot.assert_called()
+    mock_plt.xlabel.assert_called_with('MC moves')
+    mock_plt.ylabel.assert_called_with('Replica')
+    mock_plt.grid.assert_called_once()
+    mock_plt.legend.assert_called_once()
+    mock_plt.savefig.assert_called_once_with(fig_name, dpi=600)
     assert mock_plt.plot.call_count == len(trajs)
 
     # mock_plt.plot.assert_any_call(x_input, y_input_1, color=colors[0], label='Trajectory 0')
@@ -246,7 +245,7 @@ def test_plot_state_trajs(mock_plt):
     colors = [cmap(i) for i in np.arange(n_sim) / n_sim]
 
     # Mock the return value of plt.subplots to return a tuple of two mock objects
-    # We need this because plot_state_trajs calls _, ax = plt.subplots(...). When we mock 
+    # We need this because plot_state_trajs calls _, ax = plt.subplots(...). When we mock
     # matplolib.pyplot using mock_plt, plt.subplots will be replaced by mock_plt.subplots
     # and will return a mock object, not the tuple of figure and axes objects that the real plt.subplots returns.
     # This would in turn lead to an ValueError. To avoid this, we need to mock the return values of plt.subplots.
@@ -259,20 +258,64 @@ def test_plot_state_trajs(mock_plt):
 
     analyze_traj.plot_state_trajs(trajs, state_ranges, fig_name)
 
-    x_input = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-    
+    x_input = np.arange(10)
+
     mock_plt.subplots.assert_called_once_with(nrows=1, ncols=2, figsize=(5, 2.5))
     mock_plt.subplot.assert_called()
     mock_plt.plot.assert_called()
     mock_plt.fill_between.assert_called()
     mock_plt.xlabel.assert_called_with('MC moves')
     mock_plt.ylabel.assert_called_with('State')
+    mock_plt.xlim.assert_called_with([0, 9])
+    mock_plt.ylim.assert_called_with([-0.2, 5.2])
     mock_plt.grid.assert_called()
+    mock_plt.tight_layout.assert_called_once()
+    mock_plt.savefig.assert_called_once_with(fig_name, dpi=600)
 
     assert mock_plt.subplot.call_count == len(state_ranges)
     assert mock_plt.plot.call_count == len(state_ranges)
-    assert mock_plt.grid.call_count == len(state_ranges)
     assert mock_plt.fill_between.call_count == len(state_ranges) ** 2
+    assert mock_plt.xlabel.call_count == len(state_ranges)
+    assert mock_plt.ylabel.call_count == len(state_ranges)
+    assert mock_plt.xlim.call_count == len(state_ranges)
+    assert mock_plt.ylim.call_count == len(state_ranges)
+    assert mock_plt.grid.call_count == len(state_ranges)
+
+    assert all(np.array_equal(a, b) for a, b in zip(mock_plt.plot.call_args_list[0][0], (x_input, trajs[0])))
+    assert all(np.array_equal(a, b) for a, b in zip(mock_plt.plot.call_args_list[1][0], (x_input, trajs[1])))
+    assert mock_plt.plot.call_args_list[0][1] == {'color': colors[0], 'linewidth': 1}
+    assert mock_plt.plot.call_args_list[1][1] == {'color': colors[1], 'linewidth': 1}
+    assert mock_plt.fill_between.call_args_list[0] == (([0, 9],), {'y1': 3, 'y2': -0.5, 'color': colors[0], 'alpha': 0.1})  # noqa: E501
+    assert mock_plt.fill_between.call_args_list[1] == (([0, 9],), {'y1': 5.5, 'y2': 2, 'color': colors[1], 'alpha': 0.1})  # noqa: E501
+
+    # Case 2: Short trajs with dt and stride
+    # Well here we will just test things different from Case 1.
+    mock_plt.reset_mock()
+    dt = 0.2  # ps
+    stride = 2
+    x_input = np.arange(10)[::stride] * dt
+    y_input_1 = np.array([0, 0, 3, 3, 5])
+    y_input_2 = np.array([2, 4, 4, 2, 0])
+
+    analyze_traj.plot_state_trajs(trajs, state_ranges, fig_name, dt, stride)
+
+    mock_plt.xlabel.assert_called_with('Time (ps)')
+    assert all(np.array_equal(a, b) for a, b in zip(mock_plt.plot.call_args_list[0][0], (x_input, y_input_1)))
+    assert all(np.array_equal(a, b) for a, b in zip(mock_plt.plot.call_args_list[1][0], (x_input, y_input_2)))
+
+    # Case 3: Long trajs with dt and without stride
+    print('case 3')
+    mock_plt.reset_mock()
+    trajs = np.random.randint(low=0, high=5, size=(2, 2000000))
+    analyze_traj.plot_state_trajs(trajs, state_ranges, fig_name, dt)
+
+    mock_plt.xlabel.assert_called_with('Time (ns)')
+    assert len(mock_plt.plot.call_args_list[0][0][0]) == 2000000 / 10
+    assert len(mock_plt.plot.call_args_list[0][0][1]) == 2000000 / 10
+    assert len(mock_plt.plot.call_args_list[1][0][0]) == 2000000 / 10
+    assert len(mock_plt.plot.call_args_list[1][0][1]) == 2000000 / 10
+    assert mock_plt.plot.call_args_list[0][1] == {'color': colors[0], 'linewidth': 0.01}
+    assert mock_plt.plot.call_args_list[1][1] == {'color': colors[1], 'linewidth': 0.01}
 
 
 def test_plot_state_hist():

From fcb8787902e80fbe8b2c9cd65a702ddf0336ed1c Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Sun, 31 Mar 2024 00:22:31 +0800
Subject: [PATCH 25/41] Added a test for plot_state_hist

---
 ensemble_md/analysis/analyze_traj.py   |   2 +
 ensemble_md/tests/test_analyze_traj.py | 121 ++++++++++++++++++++++++-
 2 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 8e4b66f8..c2ce555c 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -485,6 +485,7 @@ def plot_state_hist(trajs, state_ranges, fig_name, stack=True, figsize=None, pre
     hist_data = []
     lower_bound, upper_bound = -0.5, n_states - 0.5
     for traj in trajs:
+        # bins for different traj in trajs should be the same
         hist, bins = np.histogram(traj, bins=np.arange(lower_bound, upper_bound + 1, 1))
         hist_data.append(hist)
     if save_hist is True:
@@ -510,6 +511,7 @@ def plot_state_hist(trajs, state_ranges, fig_name, stack=True, figsize=None, pre
         y_max = 0
         for i in range(n_configs):
             max_count = np.max(bottom + hist_data[i])
+            print(max_count)
             if max_count > y_max:
                 y_max = max_count
             plt.bar(
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index 9947e930..89f2aadb 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -110,6 +110,9 @@ def test_stitch_time_series_for_sim():
     #     [0, 0, 3, 1, 4, 4, 5, 4, 5, 5, 4]
     # ]
 
+    # Clean up
+    shutil.rmtree('ensemble_md/tests/data/stitch_test')
+
 
 def test_stitch_trajs():
     pass
@@ -318,8 +321,122 @@ def test_plot_state_trajs(mock_plt):
     assert mock_plt.plot.call_args_list[1][1] == {'color': colors[1], 'linewidth': 0.01}
 
 
-def test_plot_state_hist():
-    pass
+@patch('ensemble_md.analysis.analyze_traj.plt')
+def test_plot_state_hist(mock_plt):
+    fig_name = 'ensemble_md/tests/data/test.png'
+    state_ranges = [[0, 1, 2, 3], [2, 3, 4, 5]]
+    trajs = np.array([[0, 1, 0, 2, 3, 4, 3, 4, 5, 4], [2, 3, 4, 5, 4, 3, 2, 1, 0, 1]], dtype=int)
+    cmap = mock_plt.cm.ocean
+    mock_fig = MagicMock()
+    mock_plt.figure.return_value = mock_fig
+
+    n_configs = 2
+    colors = [cmap(i) for i in np.arange(n_configs) / n_configs]
+    hist_data = np.array([[2, 1, 1, 2, 3, 1], [1, 2, 2, 2, 2, 1]])
+
+    # Case 1: Default settings
+    analyze_traj.plot_state_hist(trajs, state_ranges, fig_name)
+
+    mock_plt.figure.assert_called_once_with(figsize=(6.4, 4.8))
+    mock_fig.add_subplot.assert_called_once_with(111)
+    mock_plt.xticks.assert_called_once_with(range(6))
+    mock_plt.xlim.assert_called_once_with([-0.5, 5.5])
+    mock_plt.ylim.assert_called_once_with([0, 5.25])  # y_max = (2 + 3) * 1.05
+    mock_plt.xlabel.assert_called_once_with('State index')
+    mock_plt.ylabel.assert_called_once_with('Count')
+    mock_plt.grid.assert_called_once()
+    mock_plt.legend.assert_called_once()
+    mock_plt.tight_layout.assert_called_once()
+    mock_plt.savefig.assert_called_once_with(fig_name, dpi=600)
+
+    assert mock_plt.bar.call_count == n_configs
+    assert mock_plt.fill_betweenx.call_count == n_configs
+    assert mock_plt.fill_betweenx.call_args_list[0] == (([0, 5.25],), {'x1': 3.5, 'x2': -1.0, 'color': colors[0], 'alpha': 0.1, 'zorder': 0})  # noqa: E501
+    assert mock_plt.fill_betweenx.call_args_list[1] == (([0, 5.25],), {'x1': 6.0, 'x2': 1.5, 'color': colors[1], 'alpha': 0.1, 'zorder': 0})  # noqa: E501
+    assert mock_plt.bar.call_args_list[0][0][0] == range(6)
+    np.testing.assert_array_equal(mock_plt.bar.call_args_list[0][0][1], hist_data[0])
+    assert mock_plt.bar.call_args_list[1][0][0] == range(6)
+    np.testing.assert_array_equal(mock_plt.bar.call_args_list[1][0][1], hist_data[1])
+    assert mock_plt.bar.call_args_list[0][1] == {
+        'align': 'center',
+        'width': 1,
+        'color': colors[0],
+        'edgecolor': 'black',
+        'label': 'Trajectory 0',
+        'alpha': 0.5,
+        'bottom': [0, 0, 0, 0, 0, 0]
+    }
+    assert mock_plt.bar.call_args_list[1][1] == {
+        'align': 'center',
+        'width': 1,
+        'color': colors[1],
+        'edgecolor': 'black',
+        'label': 'Trajectory 1',
+        'alpha': 0.5,
+        'bottom': [2, 1, 1, 2, 3, 1]
+    }
+
+    # Case 2: max(trajs[-1]) > 30, in which case we can just test the figsize
+    trajs_ = np.random.randint(low=29, high=50, size=(2, 200))
+    mock_plt.reset_mock()
+
+    analyze_traj.plot_state_hist(trajs_, state_ranges, fig_name)
+    mock_plt.figure.assert_called_once_with(figsize=(10, 4.8))
+
+    # Case 3: subplots=True
+    mock_plt.reset_mock()
+    mock_figure = MagicMock()
+    mock_axes = MagicMock()
+    mock_plt.subplots.return_value = (mock_figure, mock_axes)
+
+    analyze_traj.plot_state_hist(trajs, state_ranges, fig_name, subplots=True)
+
+    n_rows, n_cols = 1, 2
+    mock_plt.figure.assert_called_once_with(figsize=(6.4, 4.8))
+    mock_plt.subplots.assert_called_once_with(nrows=n_rows, ncols=n_cols, figsize=(8, 3))
+    mock_plt.xlabel.assert_called_with('State index')
+    mock_plt.ylabel.assert_called_with('Count')
+    mock_plt.tight_layout.assert_called_once()
+    mock_plt.savefig.assert_called_once_with(fig_name, dpi=600)
+
+    assert mock_plt.subplot.call_count == n_configs
+    assert mock_plt.subplot.call_args_list[0][0] == (n_rows, n_cols, 1)
+    assert mock_plt.subplot.call_args_list[1][0] == (n_rows, n_cols, 2)
+    assert mock_plt.bar.call_count == n_configs
+    assert mock_plt.xticks.call_count == n_configs
+    assert mock_plt.xlim.call_count == n_configs
+    assert mock_plt.xlabel.call_count == n_configs
+    assert mock_plt.ylabel.call_count == n_configs
+    assert mock_plt.title.call_count == n_configs
+    assert mock_plt.grid.call_count == n_configs
+
+    assert mock_plt.xticks.call_args_list[0][0] == ([0, 1, 2, 3],)
+    assert mock_plt.xticks.call_args_list[1][0] == ([2, 3, 4, 5],)
+    assert mock_plt.xticks.call_args_list[0][1] == {'fontsize': 8}
+    assert mock_plt.xticks.call_args_list[1][1] == {'fontsize': 8}
+    assert mock_plt.xlim.call_args_list[0][0] == ([-0.5, 3.5],)
+    assert mock_plt.xlim.call_args_list[1][0] == ([1.5, 5.5],)
+    assert mock_plt.title.call_args_list[0][0] == ('Trajectory 0',)
+    assert mock_plt.title.call_args_list[1][0] == ('Trajectory 1',)
+    assert mock_plt.bar.call_args_list[0][0][0] == [0, 1, 2, 3]
+    assert mock_plt.bar.call_args_list[1][0][0] == [2, 3, 4, 5]
+    np.testing.assert_array_equal(mock_plt.bar.call_args_list[0][0][1], hist_data[0][[0, 1, 2, 3]])
+    np.testing.assert_array_equal(mock_plt.bar.call_args_list[1][0][1], hist_data[1][[2, 3, 4, 5]])
+    assert mock_plt.bar.call_args_list[0][1] == {
+        'align': 'center',
+        'width': 1,
+        'edgecolor': 'black',
+        'alpha': 0.5,
+    }
+    assert mock_plt.bar.call_args_list[1][1] == {
+        'align': 'center',
+        'width': 1,
+        'edgecolor': 'black',
+        'alpha': 0.5,
+    }
+
+    # Clean up
+    os.remove('hist_data.npy')
 
 
 def test_calculate_hist_rmse():

From 4610175a4ce93e13b5dfef5845bae4c03c1e2577 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Sun, 31 Mar 2024 15:53:11 +0800
Subject: [PATCH 26/41] Added a test for calc_hist_rmse and plot_transit_time

---
 ensemble_md/analysis/analyze_traj.py   | 20 +++---
 ensemble_md/tests/test_analyze_traj.py | 89 ++++++++++++++++++++++++--
 2 files changed, 97 insertions(+), 12 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index c2ce555c..3e448fd9 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -10,8 +10,10 @@
 """
 The :obj:`.analyze_traj` module provides methods for analyzing trajectories in REXEE.
 """
+import copy
 import numpy as np
 import matplotlib.pyplot as plt
+from itertools import chain
 from matplotlib.ticker import MaxNLocator
 
 from alchemlyb.parsing.gmx import _get_headers as get_headers
@@ -511,7 +513,6 @@ def plot_state_hist(trajs, state_ranges, fig_name, stack=True, figsize=None, pre
         y_max = 0
         for i in range(n_configs):
             max_count = np.max(bottom + hist_data[i])
-            print(max_count)
             if max_count > y_max:
                 y_max = max_count
             plt.bar(
@@ -678,12 +679,13 @@ def plot_transit_time(trajs, N, fig_prefix=None, dt=None, folder='.'):
                 last_visited = k
 
         # Here we figure out the round-trip time from t_0k and t_k0.
-        if len(t_0k) != len(t_k0):   # then it must be len(t_0k) = len(t_k0) + 1 or len(t_k0) = len(t_0k) + 1, so we drop the last element of the larger list  # noqa: E501
-            if len(t_0k) > len(t_k0):
-                t_0k.pop()
+        t_0k_, t_k0_ = copy.deepcopy(t_0k), copy.deepcopy(t_k0)
+        if len(t_0k_) != len(t_k0_):   # then it must be len(t_0k) = len(t_k0) + 1 or len(t_k0) = len(t_0k) + 1, so we drop the last element of the larger list  # noqa: E501
+            if len(t_0k_) > len(t_k0_):
+                t_0k_.pop()
             else:
-                t_k0.pop()
-        t_roundtrip = list(np.array(t_0k) + np.array(t_k0))
+                t_k0_.pop()
+        t_roundtrip = list(np.array(t_0k_) + np.array(t_k0_))
 
         if end_0_found is True and end_k_found is True:
             if dt is not None:
@@ -711,7 +713,8 @@ def plot_transit_time(trajs, N, fig_prefix=None, dt=None, folder='.'):
             t_roundtrip_avg.append(np.mean(t_roundtrip))
 
             if len(t_0k) + len(t_k0) + len(t_roundtrip) > 0:  # i.e. not all are empty
-                if sci is False and np.max([t_0k, t_k0, t_roundtrip]) >= 10000:
+                flattened_list = list(chain.from_iterable([t_0k, t_k0, t_roundtrip]))
+                if sci is False and np.max(flattened_list) >= 10000:
                     sci = True
         else:
             t_0k_list.append([])
@@ -742,7 +745,8 @@ def plot_transit_time(trajs, N, fig_prefix=None, dt=None, folder='.'):
             for i in range(len(t_list)):    # t_list[i] is the list for trajectory i
                 plt.plot(np.arange(len(t_list[i])) + 1, t_list[i], label=f'Trajectory {i}', marker=marker)
 
-            if max(max((t_list))) >= 10000:
+            flattened_t_list = list(chain.from_iterable(t_list))
+            if np.max(flattened_t_list) >= 10000:
                 plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
             plt.xlabel('Event index')
             plt.ylabel(f'{y_labels[t]}')
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index 89f2aadb..5f615178 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -439,12 +439,93 @@ def test_plot_state_hist(mock_plt):
     os.remove('hist_data.npy')
 
 
-def test_calculate_hist_rmse():
-    pass
+def test_calc_hist_rmse():
+    # Case 1: Exactly flat histogram with some states acceessible by 2 replicas
+    hist_data = [[15, 15, 30, 30, 15, 15], [15, 15, 30, 30, 15, 15]]
+    state_ranges = [[0, 1, 2, 3], [2, 3, 4, 5]]
+    assert analyze_traj.calc_hist_rmse(hist_data, state_ranges) == 0
 
+    # Case 2: Exactly flat histogram with some states acceessible by 3 replicas
+    hist_data = [[10, 20, 30, 30, 20, 10], [10, 20, 30, 30, 20, 10]]
+    state_ranges = [[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]]
+    assert analyze_traj.calc_hist_rmse(hist_data, state_ranges) == 0
 
-def plot_transit_time():
-    pass
+
+@patch('ensemble_md.analysis.analyze_traj.plt')
+def test_plot_transit_time(mock_plt):
+    N = 4
+    trajs = [
+        [0, 1, 2, 0, 2, 3, 2, 2, 1, 0, 1, 1, 2, 0, 1, 2, 3, 2, 1, 0],
+        [1, 2, 1, 0, 1, 2, 2, 3, 2, 3, 3, 2, 1, 0, 1, 2, 2, 3, 2, 1]
+    ]
+
+    # Case 1: Default settings
+    t_1, t_2, t_3, u = analyze_traj.plot_transit_time(trajs, N)
+    assert t_1 == [[5, 7], [4, 4]]
+    assert t_2 == [[4, 3], [6]]
+    assert t_3 == [[9, 10], [10]]
+    assert u == 'step'
+
+    mock_plt.figure.assert_called()
+    mock_plt.plot.assert_called()
+    mock_plt.xlabel.assert_called_with('Event index')
+    mock_plt.ylabel.assert_called()
+
+    assert mock_plt.figure.call_count == 3
+    assert mock_plt.plot.call_count == 6
+    assert mock_plt.xlabel.call_count == 3
+    assert mock_plt.ylabel.call_count == 3
+    assert mock_plt.grid.call_count == 3
+    assert mock_plt.legend.call_count == 3
+
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[0][0], [[1, 2], [5, 7]])
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[1][0], [[1, 2], [4, 4]])
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[2][0], [[1, 2], [4, 3]])
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[3][0], [[1], [6]])
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[4][0], [[1, 2], [9, 10]])
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[5][0], [[1], [10]])
+    assert mock_plt.plot.call_args_list[0][1] == {'label': 'Trajectory 0', 'marker': 'o'}
+    assert mock_plt.plot.call_args_list[1][1] == {'label': 'Trajectory 1', 'marker': 'o'}
+    assert mock_plt.plot.call_args_list[2][1] == {'label': 'Trajectory 0', 'marker': 'o'}
+    assert mock_plt.plot.call_args_list[3][1] == {'label': 'Trajectory 1', 'marker': 'o'}
+    assert mock_plt.plot.call_args_list[4][1] == {'label': 'Trajectory 0', 'marker': 'o'}
+    assert mock_plt.plot.call_args_list[5][1] == {'label': 'Trajectory 1', 'marker': 'o'}
+    assert mock_plt.ylabel.call_args_list[0][0] == ('Average transit time from states 0 to k (step)',)
+    assert mock_plt.ylabel.call_args_list[1][0] == ('Average transit time from states k to 0 (step)',)
+    assert mock_plt.ylabel.call_args_list[2][0] == ('Average round-trip time (step)',)
+
+    # Case 2: dt = 0.2 ps, fig_prefix = 'test', here we just test the return values
+    mock_plt.reset_mock()
+    t_1, t_2, t_3, u = analyze_traj.plot_transit_time(trajs, N, dt=0.2)
+    t_1_, t_2_, t_3_ = [[1.0, 1.4], [0.8, 0.8]], [[0.8, 0.6], [1.2]], [[1.8, 2.0], [2.0]]
+    for i in range(2):
+        np.testing.assert_array_almost_equal(t_1[i], t_1_[i])
+        np.testing.assert_array_almost_equal(t_2[i], t_2_[i])
+        np.testing.assert_array_almost_equal(t_3[i], t_3_[i])
+    assert u == 'ps'
+
+    # Case 3: dt = 200 ps, long trajs
+    mock_plt.reset_mock()
+    trajs = np.ones((2, 2000000), dtype=int)
+    trajs[0][0], trajs[0][1000000], trajs[0][1999999] = 0, 3, 0
+    t_1, t_2, t_3, u = analyze_traj.plot_transit_time(trajs, N, dt=200)
+    assert t_1 == [[200000.0], []]
+    assert t_2 == [[199999.8], []]
+    assert t_3 == [[399999.8], []]
+    assert u == 'ns'
+    mock_plt.ticklabel_format.assert_called_with(style='sci', axis='y', scilimits=(0, 0))
+    assert mock_plt.ticklabel_format.call_count == 3
+
+    # Case 4: Poor sampling
+    mock_plt.reset_mock()
+    trajs = [[0, 1, 0, 1, 0], [1, 0, 1, 0, 1]]
+    t_1, t_2, t_3, u = analyze_traj.plot_transit_time(trajs, N)
+    assert t_1 == [[], []]
+    assert t_2 == [[], []]
+    assert t_3 == [[], []]
+    assert u == 'step'
+    mock_plt.figure.assert_not_called()
+    mock_plt.savefig.assert_not_called()
 
 
 def test_plot_g_vecs():

From 460f3e826bb7d425e34de38349cfdf942fd9e347 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Wed, 3 Apr 2024 14:27:14 +0800
Subject: [PATCH 27/41] Some intermediate work for test_plot_g_vecs

---
 ensemble_md/analysis/analyze_traj.py   |  6 ++---
 ensemble_md/tests/test_analyze_traj.py | 34 ++++++++++++++++++++++++--
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 3e448fd9..9c7de5c1 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -694,7 +694,8 @@ def plot_transit_time(trajs, N, fig_prefix=None, dt=None, folder='.'):
                 t_k0 = list(np.array(t_k0) * dt)  # units: ps
                 t_roundtrip = list(np.array(t_roundtrip) * dt)  # units: ps
                 if len(t_0k) + len(t_k0) + len(t_roundtrip) > 0:  # i.e. not all are empty
-                    if np.max([t_0k, t_k0, t_roundtrip]) > t_max:
+                    
+                    if np.max(list(chain.from_iterable([t_0k, t_k0, t_roundtrip]))) > t_max:
                         t_max = np.max([t_0k, t_k0, t_roundtrip])
 
                     if t_max >= 10000:
@@ -745,8 +746,7 @@ def plot_transit_time(trajs, N, fig_prefix=None, dt=None, folder='.'):
             for i in range(len(t_list)):    # t_list[i] is the list for trajectory i
                 plt.plot(np.arange(len(t_list[i])) + 1, t_list[i], label=f'Trajectory {i}', marker=marker)
 
-            flattened_t_list = list(chain.from_iterable(t_list))
-            if np.max(flattened_t_list) >= 10000:
+            if np.max(list(chain.from_iterable(t_list))) >= 10000:
                 plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
             plt.xlabel('Event index')
             plt.ylabel(f'{y_labels[t]}')
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index 5f615178..cd3f29dc 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -528,8 +528,38 @@ def test_plot_transit_time(mock_plt):
     mock_plt.savefig.assert_not_called()
 
 
-def test_plot_g_vecs():
-    pass
+@patch('ensemble_md.analysis.analyze_traj.plt')
+def test_plot_g_vecs(mock_plt):
+    cmap = mock_plt.cm.ocean
+    mock_ax = MagicMock()
+    mock_plt.gca.return_value = mock_ax
+    
+    # Case 1: Short g_vecs with refs and with plot_rmse = True
+    g_vecs = np.array([[0, 10, 20, 30], [0, 8, 18, 28]])
+    refs = np.array([0, 8, 18, 28])
+    refs_err = np.array([0.1, 0.1, 0.1, 0.1])
+
+    analyze_traj.plot_g_vecs(g_vecs, refs, refs_err, plot_rmse=True)
+
+    mock_plt.figure.assert_called()
+    mock_plt.plot.assert_called()
+    mock_plt.xlabel.assert_called_with('Iteration index')
+    # mock_plt.ylabel.assert_called_any('Alchemical weight (kT)')
+    mock_plt.xlim.assert_called()
+    mock_plt.grid.assert_called()
+    mock_plt.legend.assert_called_with(loc='center left', bbox_to_anchor=(1, 0.2))
+
+    assert mock_plt.figure.call_count == 2
+    assert mock_plt.plot.call_count == 4
+    assert mock_plt.axhline.call_count == 3
+    assert mock_plt.fill_between.call_count == 3
+    assert mock_plt.grid.call_count == 2
+
+    assert mock_plt.ylabel.call_args_list[0][0] == ('Alchemical weight (kT)',)
+    assert mock_plt.ylabel.call_args_list[1][0] == ('RMSE in the alchemical weights (kT)',)
+
+
+    # Case 2: Long g_vecs
 
 
 def test_get_swaps():

From e549178cd5423559d633eb16e4ab891f0ce06104 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Wed, 3 Apr 2024 17:06:13 +0800
Subject: [PATCH 28/41] Modified calc_spectral_gap to allow uncertainty
 estimation; Added synthesize_transmtx

---
 ensemble_md/analysis/analyze_matrix.py | 69 +++++++++++++++++++++++---
 ensemble_md/analysis/analyze_traj.py   |  2 +-
 ensemble_md/tests/test_analyze_traj.py |  5 +-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/ensemble_md/analysis/analyze_matrix.py b/ensemble_md/analysis/analyze_matrix.py
index 82d16d8f..c3bf665d 100644
--- a/ensemble_md/analysis/analyze_matrix.py
+++ b/ensemble_md/analysis/analyze_matrix.py
@@ -16,6 +16,7 @@
 from matplotlib import cm
 from ensemble_md.utils.exceptions import ParseError
 from ensemble_md.utils.exceptions import ParameterError
+from ensmeble_md.analysis import analyze_traj
 
 
 def calc_transmtx(log_file, expanded_ensemble=True):
@@ -122,23 +123,59 @@ def calc_equil_prob(trans_mtx):
     return equil_prob
 
 
-def calc_spectral_gap(trans_mtx, atol=1e-8):
+def synthesize_transmtx(trans_mtx, n_frames=100000):
     """
-    Calculates the spectral gap of the input transition matrix.
+    Synthesizes a mock transition matrix by calculating the underlying equilibrium probability
+    of the input transition matrix, synthesizing a trajectory by drawing samples from the equilibrium
+    distribution, and calculating the transition matrix from the trajectory.
+
+    Parameters
+    ----------
+    trans_mtx: np.ndarray
+        The input transition matrix.
+    n_frames: int
+        The number of frames of the synthesized trajectory from which the mock transition matrix is calculated.
+
+    Returns
+    -------
+    syn_mtx: np.ndarray
+        The synthesized transition matrix.
+    syn_traj: np.ndarray
+        The synthesized trajectory.
+    diff_mtx: np.ndarray
+        The absolute difference between the input and synthesized transition matrices.
+    """
+    equil_prob = calc_equil_prob(trans_mtx)
+    n_states = len(equil_prob)
+    syn_traj = np.random.choice(n_states, size=n_frames, p=equil_prob)
+    syn_mtx = analyze_traj.traj_to_transmtx(syn_traj, n_states)
+    diff_mtx = np.abs(trans_mtx - syn_mtx)
+
+    return syn_mtx, syn_traj, diff_mtx
+
+
+def calc_spectral_gap(trans_mtx, atol=1e-8, n_bootstrap=50):
+    """
+    Calculates the spectral gap of the input transition matrix and estimates its
+    uncertainty using the bootstrap method.
 
     Parameters
     ----------
     trans_mtx : np.ndarray
-        The input state transition matrix
+        The input transition matrix
     atol: float
         The absolute tolerance for checking the sum of columns and rows.
+    n_bootstrap: int
+        The number of bootstrap iterations for uncertainty estimation.
 
     Returns
     -------
     spectral_gap : float
-        The spectral gap of the input transitio n matrix
+        The spectral gap of the input transitio n matrix.
+    spectral_gap_err : float
+        The estimated uncertainty of the spectral gap.
     eig_vals : list
-        The list of eigenvalues
+        The list of eigenvalues.
     """
     check_row = sum([np.isclose(np.sum(trans_mtx[i]), 1, atol=atol) for i in range(len(trans_mtx))])
     check_col = sum([np.isclose(np.sum(trans_mtx[:, i]), 1, atol=atol) for i in range(len(trans_mtx))])
@@ -159,7 +196,27 @@ def calc_spectral_gap(trans_mtx, atol=1e-8):
 
     spectral_gap = np.abs(eig_vals[0]) - np.abs(eig_vals[1])
 
-    return spectral_gap, eig_vals
+    # Estimate the uncertainty of the spectral gap
+    spectral_gap_list = []
+    n_performed = 0
+    while n_performed < n_bootstrap:
+        mtx_boot = synthesize_transmtx(trans_mtx)[0]
+        check_row_boot = sum([np.isclose(np.sum(mtx_boot[i]), 1, atol=atol) for i in range(len(mtx_boot))])
+        check_col_boot = sum([np.isclose(np.sum(mtx_boot[:, i]), 1, atol=atol) for i in range(len(mtx_boot))])
+        if check_row_boot == len(mtx_boot):
+            eig_vals_boot, _ = np.linalg.eig(mtx_boot.T)
+        elif check_col_boot == len(mtx_boot):
+            eig_vals_boot, _ = np.linalg.eig(mtx_boot)
+        else:
+            continue
+
+        n_performed += 1
+        eig_vals_boot = np.sort(eig_vals_boot)[::-1]
+        spectral_gap_list.append(np.abs(eig_vals_boot[0]) - np.abs(eig_vals_boot[1]))
+
+    spectral_gap_err = np.std(spectral_gap_list, ddof=1)
+
+    return spectral_gap, spectral_gap_err, eig_vals
 
 
 def split_transmtx(trans_mtx, n_sim, n_sub):
diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 9c7de5c1..be288f5d 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -694,7 +694,7 @@ def plot_transit_time(trajs, N, fig_prefix=None, dt=None, folder='.'):
                 t_k0 = list(np.array(t_k0) * dt)  # units: ps
                 t_roundtrip = list(np.array(t_roundtrip) * dt)  # units: ps
                 if len(t_0k) + len(t_k0) + len(t_roundtrip) > 0:  # i.e. not all are empty
-                    
+
                     if np.max(list(chain.from_iterable([t_0k, t_k0, t_roundtrip]))) > t_max:
                         t_max = np.max([t_0k, t_k0, t_roundtrip])
 
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index cd3f29dc..86581573 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -530,10 +530,10 @@ def test_plot_transit_time(mock_plt):
 
 @patch('ensemble_md.analysis.analyze_traj.plt')
 def test_plot_g_vecs(mock_plt):
-    cmap = mock_plt.cm.ocean
+    # cmap = mock_plt.cm.ocean
     mock_ax = MagicMock()
     mock_plt.gca.return_value = mock_ax
-    
+
     # Case 1: Short g_vecs with refs and with plot_rmse = True
     g_vecs = np.array([[0, 10, 20, 30], [0, 8, 18, 28]])
     refs = np.array([0, 8, 18, 28])
@@ -558,7 +558,6 @@ def test_plot_g_vecs(mock_plt):
     assert mock_plt.ylabel.call_args_list[0][0] == ('Alchemical weight (kT)',)
     assert mock_plt.ylabel.call_args_list[1][0] == ('RMSE in the alchemical weights (kT)',)
 
-
     # Case 2: Long g_vecs
 
 

From 322af8eb5319b4980cf41693d6b7ef029be0ab0f Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Wed, 3 Apr 2024 18:52:10 +0800
Subject: [PATCH 29/41] Added synthesize_data.py to ensemble_md.analysis

---
 ensemble_md/analysis/analyze_matrix.py  |  10 +-
 ensemble_md/analysis/synthesize_data.py | 116 ++++++++++++++++++++++++
 2 files changed, 122 insertions(+), 4 deletions(-)
 create mode 100644 ensemble_md/analysis/synthesize_data.py

diff --git a/ensemble_md/analysis/analyze_matrix.py b/ensemble_md/analysis/analyze_matrix.py
index c3bf665d..ecd3feee 100644
--- a/ensemble_md/analysis/analyze_matrix.py
+++ b/ensemble_md/analysis/analyze_matrix.py
@@ -16,7 +16,6 @@
 from matplotlib import cm
 from ensemble_md.utils.exceptions import ParseError
 from ensemble_md.utils.exceptions import ParameterError
-from ensmeble_md.analysis import analyze_traj
 
 
 def calc_transmtx(log_file, expanded_ensemble=True):
@@ -123,7 +122,7 @@ def calc_equil_prob(trans_mtx):
     return equil_prob
 
 
-def synthesize_transmtx(trans_mtx, n_frames=100000):
+def synthesize_transmtx(trans_mtx, mtx_type='rep', n_frames=100000):
     """
     Synthesizes a mock transition matrix by calculating the underlying equilibrium probability
     of the input transition matrix, synthesizing a trajectory by drawing samples from the equilibrium
@@ -133,6 +132,9 @@ def synthesize_transmtx(trans_mtx, n_frames=100000):
     ----------
     trans_mtx: np.ndarray
         The input transition matrix.
+    mtx_type: str
+        The type of the input transition matrix. It can be either 'rep' (replica-space transition matrix)
+        or 'state' (state-space transition matrix).
     n_frames: int
         The number of frames of the synthesized trajectory from which the mock transition matrix is calculated.
 
@@ -147,8 +149,8 @@ def synthesize_transmtx(trans_mtx, n_frames=100000):
     """
     equil_prob = calc_equil_prob(trans_mtx)
     n_states = len(equil_prob)
-    syn_traj = np.random.choice(n_states, size=n_frames, p=equil_prob)
-    syn_mtx = analyze_traj.traj_to_transmtx(syn_traj, n_states)
+    syn_traj = np.random.choice(n_states, size=n_frames, p=equil_prob.reshape(n_states))
+    syn_mtx = analyze_traj.traj2transmtx(syn_traj, n_states)
     diff_mtx = np.abs(trans_mtx - syn_mtx)
 
     return syn_mtx, syn_traj, diff_mtx
diff --git a/ensemble_md/analysis/synthesize_data.py b/ensemble_md/analysis/synthesize_data.py
new file mode 100644
index 00000000..d2f04e5f
--- /dev/null
+++ b/ensemble_md/analysis/synthesize_data.py
@@ -0,0 +1,116 @@
+####################################################################
+#                                                                  #
+#    ensemble_md,                                                  #
+#    a python package for running GROMACS simulation ensembles     #
+#                                                                  #
+#    Written by Wei-Tse Hsu <wehs7661@colorado.edu>                #
+#    Copyright (c) 2022 University of Colorado Boulder             #
+#                                                                  #
+####################################################################
+"""
+The :obj:`.synthesize_data` module provides methods for synthesizing REXEE data.
+"""
+import numpy as np
+from ensemble_md.analysis import analyze_traj
+from ensemble_md.analysis import analyze_matrix
+
+def synthesize_traj(trans_mtx, n_frames=100000, method='transmtx', start=0, seed=None):
+    """
+    Synthesize a trajectory based on the input transition matrix.
+
+    Parameters
+    ----------
+    trans_mtx: np.ndarray
+        The input transition matrix.
+    n_frames: int
+        The number of frames to be generated. The default value is 100000.
+    method: str
+        The method to be used for trajectory synthesis. It can be either 'transmtx' or 'equil_prob'.
+        The former refers to generating the trajectory by simulating the moves between states based on the
+        input transition matrix, with the trajectory starting from the state specified by the :code:`start` parameter.
+        If the method is :code:`equil_prob`, the trajectory will be generated by simply sampling from the equilibrium
+        probability distribution calculated from the input transition matrix. The method 'transmtx' should
+        generate a trajectory characterized by a transition matrix similar to the input one, while the method
+        'equil_prob' may generate a trajectory that has a significantly different transition matrix. Still,
+        a trajectory generated by either method should a similar underlying equilibrium probability distribution
+        (hence the spectral gap as well) as the input transition matrix. The default value is 'transmtx'.
+    start: int
+        The starting state of the synthesized trajectory if the method is :code:`transmtx`. The default value is 0,
+        i.e., the first state. This parameter is ignored if the method is :code:`equil_prob`.
+    seed: int
+        The seed for the random number generator. The default value is None, i.e., the seed is not set. 
+    
+    Returns
+    -------
+    syn_traj: np.ndarray
+        The synthesized trajectory.
+    """
+    np.random.seed(seed)  # If seed is None, the seed is not set.
+    N = len(trans_mtx)  # Can be the number of states or replicas depending on the type of the input mtraix
+    if method == 'equil_prob':
+        equil_prob = analyze_traj.calc_equil_prob(trans_mtx)
+        syn_traj = np.random.choice(N, size=n_frames, p=equil_prob.reshape(N))
+    elif method == 'transmtx':
+        syn_traj = np.zeros(n_frames, dtype=int)
+        syn_traj[0] = start
+        for i in range(1, n_frames):
+            syn_traj[i] = np.random.choice(N, p=trans_mtx[syn_traj[i-1]])
+    else:
+        raise ValueError(f'Invalid method: {method}. The method must be either "transmtx" or "equil_prob".')
+
+    return syn_traj
+
+
+def synthesize_transmtx(trans_mtx, mtx_type='rep', n_frames=100000, seed=None):
+    """
+    Synthesizes a normalized transition matrix similar to the input transition matrix by first
+    generating a trajectory using :code:`synthesize_traj` with :code:`method='transmtx'` and then
+    calculating the transition matrix from the synthesized trajectory. 
+    
+    Parameters
+    ----------
+    trans_mtx: np.ndarray
+        The input transition matrix.
+    mtx_type: str
+        The type of the input transition matrix. It can be either 'rep' (replica-space transition matrix)
+        or 'state' (state-space transition matrix).
+    n_frames: int
+        The number of frames of the synthesized trajectory from which the mock transition matrix is calculated.
+        The default value is 100000.
+    seed: int
+        The seed for the random number generator. The default value is None, i.e., the seed is not set. 
+    
+    Returns
+    -------
+    syn_mtx: np.ndarray
+        The synthesized transition matrix.
+    syn_traj: np.ndarray
+        The synthesized trajectory/trajectories from which the transition matrix is calculated. Note that
+        if :code:`mtx_type` is 'rep', this will be a list of trajectories, which represent synthesized
+        replica-space trajectories.
+    diff_mtx: np.ndarray
+        The input transition matrix subtracted by the synthesized transition matrix.
+    """
+    N = len(trans_mtx)  # can be the number of states or number of replicas depending on mtx_type
+    if mtx_type == 'state':
+        # Note that here we just use the default values (method='transmtx' and start=0) for synthesize_traj, so that
+        # the synthesized matrix will be similar to the input one. (If equil_prob is used, the resulting matrix may
+        # be very different from the input one, though the equilibrium probabilities and spectral gap should be similar.)
+        # Note that for transition matrix synthesis, the starting state of the synthesized trajectory
+        # should not matter given that the number of frames is large.
+        syn_traj = synthesize_traj(trans_mtx, n_frames, seed=seed)
+        syn_mtx = analyze_traj.traj2transmtx(syn_traj, N)
+    elif mtx_type == 'rep':
+        rep_trajs = np.array([synthesize_traj(trans_mtx, n_frames, start=i, seed=seed) for i in range(N)])
+        counts = [analyze_traj.traj2transmtx(rep_trajs[i], N, normalize=False) for i in range(len(rep_trajs))]
+        syn_mtx = np.sum(counts, axis=0)
+        syn_mtx /= np.sum(syn_mtx, axis=1)[:, None]
+        syn_traj = rep_trajs
+    else:
+        raise ValueError(f'Invalid mtx_type: {mtx_type}. The mtx_type must be either "rep" or "state".')
+
+    diff_mtx = trans_mtx - syn_mtx
+
+    return syn_mtx, syn_traj, diff_mtx
+
+

From cf3821d078b858394d1df8a89113d9d9e3c406ce Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Wed, 3 Apr 2024 18:55:47 +0800
Subject: [PATCH 30/41] Removed synthesize_transmtx from analyze_matrix.py;
 Simplified synthesize_transmtx in synthesize_data.py

---
 ensemble_md/analysis/analyze_matrix.py  | 34 ------------------------
 ensemble_md/analysis/synthesize_data.py | 35 +++++++------------------
 2 files changed, 10 insertions(+), 59 deletions(-)

diff --git a/ensemble_md/analysis/analyze_matrix.py b/ensemble_md/analysis/analyze_matrix.py
index ecd3feee..64b57d64 100644
--- a/ensemble_md/analysis/analyze_matrix.py
+++ b/ensemble_md/analysis/analyze_matrix.py
@@ -122,40 +122,6 @@ def calc_equil_prob(trans_mtx):
     return equil_prob
 
 
-def synthesize_transmtx(trans_mtx, mtx_type='rep', n_frames=100000):
-    """
-    Synthesizes a mock transition matrix by calculating the underlying equilibrium probability
-    of the input transition matrix, synthesizing a trajectory by drawing samples from the equilibrium
-    distribution, and calculating the transition matrix from the trajectory.
-
-    Parameters
-    ----------
-    trans_mtx: np.ndarray
-        The input transition matrix.
-    mtx_type: str
-        The type of the input transition matrix. It can be either 'rep' (replica-space transition matrix)
-        or 'state' (state-space transition matrix).
-    n_frames: int
-        The number of frames of the synthesized trajectory from which the mock transition matrix is calculated.
-
-    Returns
-    -------
-    syn_mtx: np.ndarray
-        The synthesized transition matrix.
-    syn_traj: np.ndarray
-        The synthesized trajectory.
-    diff_mtx: np.ndarray
-        The absolute difference between the input and synthesized transition matrices.
-    """
-    equil_prob = calc_equil_prob(trans_mtx)
-    n_states = len(equil_prob)
-    syn_traj = np.random.choice(n_states, size=n_frames, p=equil_prob.reshape(n_states))
-    syn_mtx = analyze_traj.traj2transmtx(syn_traj, n_states)
-    diff_mtx = np.abs(trans_mtx - syn_mtx)
-
-    return syn_mtx, syn_traj, diff_mtx
-
-
 def calc_spectral_gap(trans_mtx, atol=1e-8, n_bootstrap=50):
     """
     Calculates the spectral gap of the input transition matrix and estimates its
diff --git a/ensemble_md/analysis/synthesize_data.py b/ensemble_md/analysis/synthesize_data.py
index d2f04e5f..4a485c72 100644
--- a/ensemble_md/analysis/synthesize_data.py
+++ b/ensemble_md/analysis/synthesize_data.py
@@ -61,7 +61,7 @@ def synthesize_traj(trans_mtx, n_frames=100000, method='transmtx', start=0, seed
     return syn_traj
 
 
-def synthesize_transmtx(trans_mtx, mtx_type='rep', n_frames=100000, seed=None):
+def synthesize_transmtx(trans_mtx, n_frames=100000, seed=None):
     """
     Synthesizes a normalized transition matrix similar to the input transition matrix by first
     generating a trajectory using :code:`synthesize_traj` with :code:`method='transmtx'` and then
@@ -71,9 +71,6 @@ def synthesize_transmtx(trans_mtx, mtx_type='rep', n_frames=100000, seed=None):
     ----------
     trans_mtx: np.ndarray
         The input transition matrix.
-    mtx_type: str
-        The type of the input transition matrix. It can be either 'rep' (replica-space transition matrix)
-        or 'state' (state-space transition matrix).
     n_frames: int
         The number of frames of the synthesized trajectory from which the mock transition matrix is calculated.
         The default value is 100000.
@@ -85,32 +82,20 @@ def synthesize_transmtx(trans_mtx, mtx_type='rep', n_frames=100000, seed=None):
     syn_mtx: np.ndarray
         The synthesized transition matrix.
     syn_traj: np.ndarray
-        The synthesized trajectory/trajectories from which the transition matrix is calculated. Note that
-        if :code:`mtx_type` is 'rep', this will be a list of trajectories, which represent synthesized
-        replica-space trajectories.
+        The synthesized trajectory from which the transition matrix is calculated.
     diff_mtx: np.ndarray
         The input transition matrix subtracted by the synthesized transition matrix.
     """
     N = len(trans_mtx)  # can be the number of states or number of replicas depending on mtx_type
-    if mtx_type == 'state':
-        # Note that here we just use the default values (method='transmtx' and start=0) for synthesize_traj, so that
-        # the synthesized matrix will be similar to the input one. (If equil_prob is used, the resulting matrix may
-        # be very different from the input one, though the equilibrium probabilities and spectral gap should be similar.)
-        # Note that for transition matrix synthesis, the starting state of the synthesized trajectory
-        # should not matter given that the number of frames is large.
-        syn_traj = synthesize_traj(trans_mtx, n_frames, seed=seed)
-        syn_mtx = analyze_traj.traj2transmtx(syn_traj, N)
-    elif mtx_type == 'rep':
-        rep_trajs = np.array([synthesize_traj(trans_mtx, n_frames, start=i, seed=seed) for i in range(N)])
-        counts = [analyze_traj.traj2transmtx(rep_trajs[i], N, normalize=False) for i in range(len(rep_trajs))]
-        syn_mtx = np.sum(counts, axis=0)
-        syn_mtx /= np.sum(syn_mtx, axis=1)[:, None]
-        syn_traj = rep_trajs
-    else:
-        raise ValueError(f'Invalid mtx_type: {mtx_type}. The mtx_type must be either "rep" or "state".')
-
+    
+    # Note that here we just use the default values (method='transmtx' and start=0) for synthesize_traj, so that
+    # the synthesized matrix will be similar to the input one. (If equil_prob is used, the resulting matrix may
+    # be very different from the input one, though the equilibrium probabilities and spectral gap should be similar.)
+    # Note that for transition matrix synthesis, the starting state of the synthesized trajectory
+    # should not matter given that the number of frames is large.
+    syn_traj = synthesize_traj(trans_mtx, n_frames, seed=seed)
+    syn_mtx = analyze_traj.traj2transmtx(syn_traj, N)
     diff_mtx = trans_mtx - syn_mtx
 
     return syn_mtx, syn_traj, diff_mtx
 
-

From 9d291e94661998c48446cfe714a916a7f6913bbe Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Thu, 4 Apr 2024 01:05:10 +0800
Subject: [PATCH 31/41] Modified calc_spectral_gap and added calc_t_relax

---
 ensemble_md/analysis/analyze_matrix.py | 33 +++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/ensemble_md/analysis/analyze_matrix.py b/ensemble_md/analysis/analyze_matrix.py
index 64b57d64..c8fc6817 100644
--- a/ensemble_md/analysis/analyze_matrix.py
+++ b/ensemble_md/analysis/analyze_matrix.py
@@ -16,6 +16,7 @@
 from matplotlib import cm
 from ensemble_md.utils.exceptions import ParseError
 from ensemble_md.utils.exceptions import ParameterError
+from ensemble_md.analysis import synthesize_data
 
 
 def calc_transmtx(log_file, expanded_ensemble=True):
@@ -168,7 +169,7 @@ def calc_spectral_gap(trans_mtx, atol=1e-8, n_bootstrap=50):
     spectral_gap_list = []
     n_performed = 0
     while n_performed < n_bootstrap:
-        mtx_boot = synthesize_transmtx(trans_mtx)[0]
+        mtx_boot = synthesize_data.synthesize_transmtx(trans_mtx)[0]
         check_row_boot = sum([np.isclose(np.sum(mtx_boot[i]), 1, atol=atol) for i in range(len(mtx_boot))])
         check_col_boot = sum([np.isclose(np.sum(mtx_boot[:, i]), 1, atol=atol) for i in range(len(mtx_boot))])
         if check_row_boot == len(mtx_boot):
@@ -187,6 +188,36 @@ def calc_spectral_gap(trans_mtx, atol=1e-8, n_bootstrap=50):
     return spectral_gap, spectral_gap_err, eig_vals
 
 
+def calc_t_relax(spectral_gap, exchange_period, spectral_gap_err=None):
+    """
+    Calculates the relaxation time given the spectral gap of a transition matrix of interest.
+    By defintion, the relaxation time is equal to the exchange period divided by the spectral gap.
+
+    Parameters
+    ----------
+    spectral_gap: float
+        The input spectral gap.
+    exchange_period : float
+        The exchange period of the simulation in ps.
+    spectral_gap_err : float
+        The uncertainty of the spectral gap, which is used to calculate the uncertainty of the relaxation time using
+        error propagation.
+
+    Returns
+    -------
+    t_relax : float
+        The relaxation time in ps.
+    t_relax_err : float
+        The uncertainty of the relaxation time in ps.
+    """
+    t_relax = exchange_period / spectral_gap
+    t_relax_err = None
+
+    if spectral_gap_err is not None:
+        t_relax_err = exchange_period * spectral_gap_err / spectral_gap ** 2  # error propagation
+
+    return t_relax, t_relax_err
+
 def split_transmtx(trans_mtx, n_sim, n_sub):
     """
     Split the input transition matrix into blocks of smaller matrices corresponding to

From 904dd7b8fdddb4a962cb9864bd05de89cd5bb4e0 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Thu, 4 Apr 2024 02:06:48 +0800
Subject: [PATCH 32/41] Fixed the linting error; Modified calc_spectral_gap

---
 ensemble_md/analysis/analyze_matrix.py  |  7 +++++--
 ensemble_md/analysis/synthesize_data.py | 20 ++++++++++----------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/ensemble_md/analysis/analyze_matrix.py b/ensemble_md/analysis/analyze_matrix.py
index c8fc6817..5af1ef2f 100644
--- a/ensemble_md/analysis/analyze_matrix.py
+++ b/ensemble_md/analysis/analyze_matrix.py
@@ -123,7 +123,7 @@ def calc_equil_prob(trans_mtx):
     return equil_prob
 
 
-def calc_spectral_gap(trans_mtx, atol=1e-8, n_bootstrap=50):
+def calc_spectral_gap(trans_mtx, atol=1e-8, n_bootstrap=50, bootstrap_seed=None):
     """
     Calculates the spectral gap of the input transition matrix and estimates its
     uncertainty using the bootstrap method.
@@ -136,6 +136,8 @@ def calc_spectral_gap(trans_mtx, atol=1e-8, n_bootstrap=50):
         The absolute tolerance for checking the sum of columns and rows.
     n_bootstrap: int
         The number of bootstrap iterations for uncertainty estimation.
+    bootstrap_seed: int
+        The seed for the random number generator for the bootstrap method.
 
     Returns
     -------
@@ -169,7 +171,7 @@ def calc_spectral_gap(trans_mtx, atol=1e-8, n_bootstrap=50):
     spectral_gap_list = []
     n_performed = 0
     while n_performed < n_bootstrap:
-        mtx_boot = synthesize_data.synthesize_transmtx(trans_mtx)[0]
+        mtx_boot = synthesize_data.synthesize_transmtx(trans_mtx, seed=bootstrap_seed)[0]
         check_row_boot = sum([np.isclose(np.sum(mtx_boot[i]), 1, atol=atol) for i in range(len(mtx_boot))])
         check_col_boot = sum([np.isclose(np.sum(mtx_boot[:, i]), 1, atol=atol) for i in range(len(mtx_boot))])
         if check_row_boot == len(mtx_boot):
@@ -218,6 +220,7 @@ def calc_t_relax(spectral_gap, exchange_period, spectral_gap_err=None):
 
     return t_relax, t_relax_err
 
+
 def split_transmtx(trans_mtx, n_sim, n_sub):
     """
     Split the input transition matrix into blocks of smaller matrices corresponding to
diff --git a/ensemble_md/analysis/synthesize_data.py b/ensemble_md/analysis/synthesize_data.py
index 4a485c72..3ea8cbc7 100644
--- a/ensemble_md/analysis/synthesize_data.py
+++ b/ensemble_md/analysis/synthesize_data.py
@@ -12,7 +12,7 @@
 """
 import numpy as np
 from ensemble_md.analysis import analyze_traj
-from ensemble_md.analysis import analyze_matrix
+
 
 def synthesize_traj(trans_mtx, n_frames=100000, method='transmtx', start=0, seed=None):
     """
@@ -38,14 +38,15 @@ def synthesize_traj(trans_mtx, n_frames=100000, method='transmtx', start=0, seed
         The starting state of the synthesized trajectory if the method is :code:`transmtx`. The default value is 0,
         i.e., the first state. This parameter is ignored if the method is :code:`equil_prob`.
     seed: int
-        The seed for the random number generator. The default value is None, i.e., the seed is not set. 
-    
+        The seed for the random number generator. The default value is None, i.e., the seed is not set.
+
     Returns
     -------
     syn_traj: np.ndarray
         The synthesized trajectory.
     """
-    np.random.seed(seed)  # If seed is None, the seed is not set.
+    if seed is not None:
+        np.random.seed(seed)
     N = len(trans_mtx)  # Can be the number of states or replicas depending on the type of the input mtraix
     if method == 'equil_prob':
         equil_prob = analyze_traj.calc_equil_prob(trans_mtx)
@@ -65,8 +66,8 @@ def synthesize_transmtx(trans_mtx, n_frames=100000, seed=None):
     """
     Synthesizes a normalized transition matrix similar to the input transition matrix by first
     generating a trajectory using :code:`synthesize_traj` with :code:`method='transmtx'` and then
-    calculating the transition matrix from the synthesized trajectory. 
-    
+    calculating the transition matrix from the synthesized trajectory.
+
     Parameters
     ----------
     trans_mtx: np.ndarray
@@ -75,8 +76,8 @@ def synthesize_transmtx(trans_mtx, n_frames=100000, seed=None):
         The number of frames of the synthesized trajectory from which the mock transition matrix is calculated.
         The default value is 100000.
     seed: int
-        The seed for the random number generator. The default value is None, i.e., the seed is not set. 
-    
+        The seed for the random number generator. The default value is None, i.e., the seed is not set.
+
     Returns
     -------
     syn_mtx: np.ndarray
@@ -87,7 +88,7 @@ def synthesize_transmtx(trans_mtx, n_frames=100000, seed=None):
         The input transition matrix subtracted by the synthesized transition matrix.
     """
     N = len(trans_mtx)  # can be the number of states or number of replicas depending on mtx_type
-    
+
     # Note that here we just use the default values (method='transmtx' and start=0) for synthesize_traj, so that
     # the synthesized matrix will be similar to the input one. (If equil_prob is used, the resulting matrix may
     # be very different from the input one, though the equilibrium probabilities and spectral gap should be similar.)
@@ -98,4 +99,3 @@ def synthesize_transmtx(trans_mtx, n_frames=100000, seed=None):
     diff_mtx = trans_mtx - syn_mtx
 
     return syn_mtx, syn_traj, diff_mtx
-

From 7d6320a116eec24ca97113fa40e13a43769720ef Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Sun, 7 Apr 2024 02:11:17 +0800
Subject: [PATCH 33/41] Refined some previously written tests

---
 ensemble_md/analysis/analyze_traj.py     |   6 +-
 ensemble_md/analysis/synthesize_data.py  |  11 +-
 ensemble_md/tests/test_analyze_matrix.py |   8 +-
 ensemble_md/tests/test_analyze_traj.py   | 139 +++++++++++++++++++++--
 4 files changed, 146 insertions(+), 18 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index be288f5d..25f48e2c 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -753,7 +753,7 @@ def plot_transit_time(trajs, N, fig_prefix=None, dt=None, folder='.'):
             plt.grid()
             plt.legend()
             if fig_prefix is None:
-                plt.savefig(f'{folder}/{fig_names[t]}')
+                plt.savefig(f'{folder}/{fig_names[t]}', dpi=600)
             else:
                 plt.savefig(f'{folder}/{fig_prefix}_{fig_names[t]}', dpi=600)
 
@@ -800,9 +800,9 @@ def plot_g_vecs(g_vecs, refs=None, refs_err=None, plot_rmse=True):
     """
     # n_iter, n_state = g_vecs.shape[0], g_vecs.shape[1]
     g_vecs = np.transpose(g_vecs)
-    n_sim = len(g_vecs)
+    n_states = len(g_vecs)
     cmap = plt.cm.ocean  # other good options are CMRmap, gnuplot, terrain, turbo, brg, etc.
-    colors = [cmap(i) for i in np.arange(n_sim) / n_sim]
+    colors = [cmap(i) for i in np.arange(n_states) / n_states]
     plt.figure()
     for i in range(1, len(g_vecs)):
         if len(g_vecs[0]) < 100:
diff --git a/ensemble_md/analysis/synthesize_data.py b/ensemble_md/analysis/synthesize_data.py
index 3ea8cbc7..cefe53bf 100644
--- a/ensemble_md/analysis/synthesize_data.py
+++ b/ensemble_md/analysis/synthesize_data.py
@@ -52,10 +52,19 @@ def synthesize_traj(trans_mtx, n_frames=100000, method='transmtx', start=0, seed
         equil_prob = analyze_traj.calc_equil_prob(trans_mtx)
         syn_traj = np.random.choice(N, size=n_frames, p=equil_prob.reshape(N))
     elif method == 'transmtx':
+        check_row = sum([np.isclose(np.sum(trans_mtx[i]), 1, atol=1e-8) for i in range(len(trans_mtx))])
+        check_col = sum([np.isclose(np.sum(trans_mtx[:, i]), 1, atol=1e-8) for i in range(len(trans_mtx))])
+        if check_row == N:
+            mtx = trans_mtx
+        elif check_col == N:
+            mtx = trans_mtx.T
+        else:
+            raise ValueError('The input matrix is not normalized')
+
         syn_traj = np.zeros(n_frames, dtype=int)
         syn_traj[0] = start
         for i in range(1, n_frames):
-            syn_traj[i] = np.random.choice(N, p=trans_mtx[syn_traj[i-1]])
+            syn_traj[i] = np.random.choice(N, p=mtx[syn_traj[i-1]])
     else:
         raise ValueError(f'Invalid method: {method}. The method must be either "transmtx" or "equil_prob".')
 
diff --git a/ensemble_md/tests/test_analyze_matrix.py b/ensemble_md/tests/test_analyze_matrix.py
index b532956c..6ec25fcb 100644
--- a/ensemble_md/tests/test_analyze_matrix.py
+++ b/ensemble_md/tests/test_analyze_matrix.py
@@ -90,24 +90,24 @@ def test_calc_equil_prob(capfd):
 def test_calc_spectral_gap(capfd):
     # Case 1 (sanity check): doublly stochastic
     mtx = np.array([[0.5, 0.5], [0.5, 0.5]])
-    s, vals = analyze_matrix.calc_spectral_gap(mtx)
+    s, err, vals = analyze_matrix.calc_spectral_gap(mtx, n_bootstrap=5)
     assert vals[0] == 1
     assert np.isclose(s, 1)
 
     # Case 2: Right stochastic
     mtx = np.array([[0.8, 0.2], [0.3, 0.7]])
-    s, vals = analyze_matrix.calc_spectral_gap(mtx)
+    s, err, vals = analyze_matrix.calc_spectral_gap(mtx, n_bootstrap=5)
     assert vals[0] == 1
     assert s == 0.5
 
     # Case 3: Left stochastic
-    s, vals = analyze_matrix.calc_spectral_gap(mtx.T)
+    s, err, vals = analyze_matrix.calc_spectral_gap(mtx.T, n_bootstrap=5)
     assert vals[0] == 1
     assert s == 0.5
 
     # Case 4: Neither left or right stochastic
     mtx = np.random.rand(3, 3)
-    s = analyze_matrix.calc_spectral_gap(mtx)  # the output should be None
+    s = analyze_matrix.calc_spectral_gap(mtx, n_bootstrap=5)  # the output should be None
     out, err = capfd.readouterr()
     assert s is None
     assert 'The input transition matrix is neither right nor left stochastic' in out
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index 86581573..d6bab343 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -87,6 +87,29 @@ def test_convert_npy2xvg():
         assert content[2] == '0.0         4\n'
         assert content[3] == '0.2         6\n'
 
+    os.remove('traj_0.xvg')
+    os.remove('traj_1.xvg')
+
+    trajs = np.array([[0.0, 0.1, 0.2, 0.3], [0.4, 0.5, 0.6, 0.7]])
+    analyze_traj.convert_npy2xvg(trajs, dt, subsampling)
+
+    assert os.path.exists('traj_0.xvg')
+    assert os.path.exists('traj_1.xvg')
+
+    with open('traj_0.xvg', 'r') as f:
+        content = f.readlines()
+        assert content[0] == '# This file was created by ensemble_md\n'
+        assert content[1] == '# Time (ps) v.s. CV\n'
+        assert content[2] == '0.0      0.000000\n'
+        assert content[3] == '0.2      0.200000\n'
+
+    with open('traj_1.xvg', 'r') as f:
+        content = f.readlines()
+        assert content[0] == '# This file was created by ensemble_md\n'
+        assert content[1] == '# Time (ps) v.s. CV\n'
+        assert content[2] == '0.0      0.400000\n'
+        assert content[3] == '0.2      0.600000\n'
+
     os.remove('traj_0.xvg')
     os.remove('traj_1.xvg')
     os.chdir('../../../')
@@ -484,12 +507,16 @@ def test_plot_transit_time(mock_plt):
     np.testing.assert_array_equal(mock_plt.plot.call_args_list[3][0], [[1], [6]])
     np.testing.assert_array_equal(mock_plt.plot.call_args_list[4][0], [[1, 2], [9, 10]])
     np.testing.assert_array_equal(mock_plt.plot.call_args_list[5][0], [[1], [10]])
-    assert mock_plt.plot.call_args_list[0][1] == {'label': 'Trajectory 0', 'marker': 'o'}
-    assert mock_plt.plot.call_args_list[1][1] == {'label': 'Trajectory 1', 'marker': 'o'}
-    assert mock_plt.plot.call_args_list[2][1] == {'label': 'Trajectory 0', 'marker': 'o'}
-    assert mock_plt.plot.call_args_list[3][1] == {'label': 'Trajectory 1', 'marker': 'o'}
-    assert mock_plt.plot.call_args_list[4][1] == {'label': 'Trajectory 0', 'marker': 'o'}
-    assert mock_plt.plot.call_args_list[5][1] == {'label': 'Trajectory 1', 'marker': 'o'}
+
+    assert [mock_plt.plot.call_args_list[i][1] for i in range(6)] == [
+        {'label': 'Trajectory 0', 'marker': 'o'},
+        {'label': 'Trajectory 1', 'marker': 'o'},
+        {'label': 'Trajectory 0', 'marker': 'o'},
+        {'label': 'Trajectory 1', 'marker': 'o'},
+        {'label': 'Trajectory 0', 'marker': 'o'},
+        {'label': 'Trajectory 1', 'marker': 'o'}
+    ]
+
     assert mock_plt.ylabel.call_args_list[0][0] == ('Average transit time from states 0 to k (step)',)
     assert mock_plt.ylabel.call_args_list[1][0] == ('Average transit time from states k to 0 (step)',)
     assert mock_plt.ylabel.call_args_list[2][0] == ('Average round-trip time (step)',)
@@ -527,12 +554,78 @@ def test_plot_transit_time(mock_plt):
     mock_plt.figure.assert_not_called()
     mock_plt.savefig.assert_not_called()
 
+    # Case 5: More than 100 round trips so that a histogram is plotted
+    mock_plt.reset_mock()
+    trajs = np.array([[0, 1, 2, 3, 2] * 20000, [0, 1, 3, 2, 1] * 20000])
+    t_1, t_2, t_3, u = analyze_traj.plot_transit_time(trajs, N)
+
+    assert t_1 == [[3] * 20000, [2] * 20000]
+    assert t_2 == [[2] * 19999, [3] * 19999]
+    assert t_3 == [[5] * 19999, [5] * 19999]
+    assert u == 'step'
+
+    mock_plt.hist.assert_called()
+    mock_plt.ticklabel_format.assert_called_with(style='sci', axis='y', scilimits=(0, 0))
+
+    assert mock_plt.figure.call_count == 6
+    assert mock_plt.hist.call_count == 6
+    assert mock_plt.xlabel.call_count == 6
+    assert mock_plt.ylabel.call_count == 6
+    assert mock_plt.ticklabel_format.call_count == 6
+    assert mock_plt.grid.call_count == 6
+    assert mock_plt.legend.call_count == 6
+    assert mock_plt.savefig.call_count == 6
+
+    assert mock_plt.hist.call_args_list[0][0][0] == [3] * 20000
+    assert mock_plt.hist.call_args_list[1][0][0] == [2] * 20000
+    assert mock_plt.hist.call_args_list[2][0][0] == [2] * 19999
+    assert mock_plt.hist.call_args_list[3][0][0] == [3] * 19999
+    assert mock_plt.hist.call_args_list[4][0][0] == [5] * 19999
+    assert mock_plt.hist.call_args_list[5][0][0] == [5] * 19999
+
+    assert [mock_plt.hist.call_args_list[i][1] for i in range(6)] == [
+        {'bins': 1000, 'label': 'Trajectory 0'},
+        {'bins': 1000, 'label': 'Trajectory 1'},
+        {'bins': 999, 'label': 'Trajectory 0'},
+        {'bins': 999, 'label': 'Trajectory 1'},
+        {'bins': 999, 'label': 'Trajectory 0'},
+        {'bins': 999, 'label': 'Trajectory 1'}
+    ]
+
+    assert [mock_plt.xlabel.call_args_list[i][0][0] for i in range(6)] == [
+        'Event index',
+        'Average transit time from states 0 to k (step)',
+        'Event index',
+        'Average transit time from states k to 0 (step)',
+        'Event index',
+        'Average round-trip time (step)'
+    ]
+
+    assert [mock_plt.ylabel.call_args_list[i][0][0] for i in range(6)] == [
+        'Average transit time from states 0 to k (step)',
+        'Event count',
+        'Average transit time from states k to 0 (step)',
+        'Event count',
+        'Average round-trip time (step)',
+        'Event count'
+    ]
+
+    assert [mock_plt.savefig.call_args_list[i][0][0] for i in range(6)] == [
+        './t_0k.png',
+        './hist_t_0k.png',
+        './t_k0.png',
+        './hist_t_k0.png',
+        './t_roundtrip.png',
+        './hist_t_roundtrip.png'
+    ]
+
 
 @patch('ensemble_md.analysis.analyze_traj.plt')
 def test_plot_g_vecs(mock_plt):
-    # cmap = mock_plt.cm.ocean
+    cmap = mock_plt.cm.ocean
     mock_ax = MagicMock()
     mock_plt.gca.return_value = mock_ax
+    colors = [cmap(i) for i in np.arange(4) / 4]
 
     # Case 1: Short g_vecs with refs and with plot_rmse = True
     g_vecs = np.array([[0, 10, 20, 30], [0, 8, 18, 28]])
@@ -544,21 +637,47 @@ def test_plot_g_vecs(mock_plt):
     mock_plt.figure.assert_called()
     mock_plt.plot.assert_called()
     mock_plt.xlabel.assert_called_with('Iteration index')
-    # mock_plt.ylabel.assert_called_any('Alchemical weight (kT)')
     mock_plt.xlim.assert_called()
     mock_plt.grid.assert_called()
-    mock_plt.legend.assert_called_with(loc='center left', bbox_to_anchor=(1, 0.2))
+    mock_plt.legend.assert_called_once_with(loc='center left', bbox_to_anchor=(1, 0.2))
+    mock_plt.xlabel.assert_called_with('Iteration index')
 
     assert mock_plt.figure.call_count == 2
     assert mock_plt.plot.call_count == 4
     assert mock_plt.axhline.call_count == 3
     assert mock_plt.fill_between.call_count == 3
     assert mock_plt.grid.call_count == 2
+    assert mock_plt.xlabel.call_count == 2
+    assert mock_plt.ylabel.call_count == 2
+
+    assert [mock_plt.plot.call_args_list[i][0][0] for i in range(4)] == [range(2)] * 4
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[0][0][1], np.array([10, 8]))
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[1][0][1], np.array([20, 18]))
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[2][0][1], np.array([30, 28]))
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[3][0][1], np.array([np.sqrt(3), 0]))  # RMSE as a function the iteration index  # noqa: E501
+
+    assert mock_plt.plot.call_args_list[0][1] == {'label': 'State 1', 'marker': 'o', 'c': colors[0], 'linewidth': 0.8, 'markersize': 2}  # noqa: E501
+    assert mock_plt.plot.call_args_list[1][1] == {'label': 'State 2', 'marker': 'o', 'c': colors[1], 'linewidth': 0.8, 'markersize': 2}  # noqa: E501
+    assert mock_plt.plot.call_args_list[2][1] == {'label': 'State 3', 'marker': 'o', 'c': colors[2], 'linewidth': 0.8, 'markersize': 2}  # noqa: E501
 
     assert mock_plt.ylabel.call_args_list[0][0] == ('Alchemical weight (kT)',)
     assert mock_plt.ylabel.call_args_list[1][0] == ('RMSE in the alchemical weights (kT)',)
 
-    # Case 2: Long g_vecs
+    # Case 2: Long g_vecs, here we just check the only different line
+    mock_plt.reset_mock()
+    g_vecs = np.array([[0, 10, 20, 30]] * 200)
+    analyze_traj.plot_g_vecs(g_vecs)
+
+    assert mock_plt.plot.call_count == 3
+    assert [mock_plt.plot.call_args_list[i][0][0] for i in range(3)] == [range(200)] * 3
+
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[0][0][1], np.array([10] * 200))
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[1][0][1], np.array([20] * 200))
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[2][0][1], np.array([30] * 200))
+
+    assert mock_plt.plot.call_args_list[0][1] == {'label': 'State 1', 'c': colors[0], 'linewidth': 0.8}
+    assert mock_plt.plot.call_args_list[1][1] == {'label': 'State 2', 'c': colors[1], 'linewidth': 0.8}
+    assert mock_plt.plot.call_args_list[2][1] == {'label': 'State 3', 'c': colors[2], 'linewidth': 0.8}
 
 
 def test_get_swaps():

From dad30e3f0bf48b6bb5b4895505556162ad37e99b Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Mon, 8 Apr 2024 17:05:11 +0800
Subject: [PATCH 34/41] Added example dhdl files for unit tests

---
 .gitignore                                    |  1 -
 .../sim_0/iteration_0/dhdl.xvg                | 53 +++++++++++++++++++
 .../sim_0/iteration_1/dhdl.xvg                | 53 +++++++++++++++++++
 .../sim_1/iteration_0/dhdl.xvg                | 53 +++++++++++++++++++
 .../sim_1/iteration_1/dhdl.xvg                | 53 +++++++++++++++++++
 .../sim_2/iteration_0/dhdl.xvg                | 53 +++++++++++++++++++
 .../sim_2/iteration_1/dhdl.xvg                | 53 +++++++++++++++++++
 .../sim_3/iteration_0/dhdl.xvg                | 53 +++++++++++++++++++
 .../sim_3/iteration_1/dhdl.xvg                | 53 +++++++++++++++++++
 9 files changed, 424 insertions(+), 1 deletion(-)
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_0/dhdl.xvg
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_1/dhdl.xvg
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_0/dhdl.xvg
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_1/dhdl.xvg
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_0/dhdl.xvg
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_1/dhdl.xvg
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_0/dhdl.xvg
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_1/dhdl.xvg

diff --git a/.gitignore b/.gitignore
index 8072ae5d..f97112b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,7 +47,6 @@ nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
-sim_*
 
 # Translations
 *.mo
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_0/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_0/dhdl.xvg
new file mode 100644
index 00000000..4361bf19
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_0/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:38 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_0/iteration_0
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# Good ROcking Metal Altar for Chronical Sinners
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.0000"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.0000"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.1800"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.4200"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s7 legend "\xD\f{}H \xl\f{} to 0.6800"
+0.0000    0 -34639.617 83.426201 0.0000000 15.922017 38.982765 53.939058 65.034603
+0.2000    0 -34647.918 84.723366 0.0000000 16.173352 39.565220 54.708024 65.929419
+0.4000    0 -34582.289 76.256203 0.0000000 15.148900 38.068222 53.157510 64.402216
+0.6000    0 -34527.555 91.817261 0.0000000 17.089436 40.979859 56.205354 67.420424
+0.8000    0 -34478.152 44.958286 0.0000000 10.781056 30.639942 44.784307 55.639494
+1.0000    1 -34552.145 74.036446 -10.822186 0.0000000 20.521480 35.175168 46.409437
+1.2000    0 -34500.996 57.441593 0.0000000 12.672887 34.211579 49.085054 60.368809
+1.4000    0 -34486.180 73.066956 0.0000000 14.732327 37.610317 52.915281 64.404465
+1.6000    0 -34489.156 63.685677 0.0000000 13.517728 35.662178 50.761228 62.166708
+1.8000    1 -34485.453 48.051235 -3.2132767 0.0000000 17.011135 31.147446 42.432867
+2.0000    1 -34621.762 70.074333 -9.7361954 0.0000000 19.743919 33.944619 44.832605
+2.2000    0 -34567.770 73.505478 0.0000000 14.755254 37.589237 52.860456 64.327261
+2.4000    0 -34555.969 78.474174 0.0000000 15.282239 38.000729 52.906633 64.013218
+2.6000    0 -34505.848 82.812164 0.0000000 15.868329 38.977410 54.002984 65.160660
+2.8000    0 -34554.223 83.357063 0.0000000 16.061032 39.610695 54.966520 66.381018
+3.0000    0 -34523.898 46.973728 0.0000000 11.358151 32.163078 46.853319 58.073803
+3.2000    1 -34565.973 53.796364 -6.1438644 0.0000000 16.613933 29.401864 39.415336
+3.4000    1 -34676.988 4.8661242 9.6286995 0.0000000 9.9416612 21.194771 30.614606
+3.6000    1 -34607.273 32.110767 0.17498705 0.0000000 13.765884 26.255616 36.437160
+3.8000    1 -34669.711 64.209023 -8.5715912 0.0000000 18.601768 32.326428 42.952045
+4.0000    1 -34713.039 28.580280 0.77232287 0.0000000 12.772979 24.479550 34.015346
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_1/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_1/dhdl.xvg
new file mode 100644
index 00000000..eae708f8
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_1/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:41 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_0/iteration_1
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# Gyas ROwers Mature At Cryogenic Speed
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.1800"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.0000"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.1800"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.4200"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s7 legend "\xD\f{}H \xl\f{} to 0.6800"
+4.0000    1 -34813.508 28.745213 0.71526262 0.0000000 12.794391 24.506306 34.044969
+4.2000    1 -34827.828 50.263847 -5.3451660 0.0000000 16.086597 28.817219 38.883647
+4.4000    1 -34846.875 93.496773 -16.187871 0.0000000 23.162265 38.104386 49.167192
+4.6000    2 -34834.480 77.634636 -17.615414 -15.000507 0.0000000 12.504510 22.478638
+4.8000    1 -34787.875 70.183250 -10.633984 0.0000000 19.066112 32.470910 42.703634
+5.0000    1 -34765.312 85.339149 -14.594128 0.0000000 21.308929 35.148270 45.414091
+5.2000    0 -34747.320 77.110146 0.0000000 14.883801 36.773907 51.074174 61.713696
+5.4000    0 -34792.480 87.662727 0.0000000 16.494550 39.923397 54.977533 66.104531
+5.6000    0 -34731.535 85.343506 0.0000000 16.109479 39.085465 53.873529 64.810061
+5.8000    0 -34777.746 60.757465 0.0000000 12.847630 33.688805 47.802530 58.430992
+6.0000    0 -34840.430 76.315674 0.0000000 14.728513 36.393170 50.551549 61.088682
+6.2000    0 -34794.367 68.982430 0.0000000 13.689976 34.570004 48.444696 58.838450
+6.4000    0 -34817.184 64.156715 0.0000000 13.081715 33.568962 47.288068 57.585030
+6.6000    1 -34749.480 38.613789 -2.1158417 0.0000000 14.339462 26.609223 36.492763
+6.8000    2 -34691.605 48.527824 20.546530 -2.4403986 0.0000000 9.3389257 18.089804
+7.0000    2 -34596.328 78.725594 -18.777863 -15.318588 0.0000000 12.689737 22.837187
+7.2000    1 -34691.770 79.540344 -12.828679 0.0000000 20.697006 34.675839 45.190382
+7.4000    0 -34760.391 28.303394 0.0000000 8.4187362 26.263100 39.465459 49.718451
+7.6000    1 -34811.992 41.343948 -3.1597412 0.0000000 14.638515 26.959161 36.876424
+7.8000    1 -34708.434 55.983002 -5.7776459 0.0000000 17.697364 31.443112 42.209058
+8.0000    1 -34607.914 47.309258 -3.8846824 0.0000000 15.949348 28.791382 38.931684
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_0/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_0/dhdl.xvg
new file mode 100644
index 00000000..3c5835cc
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_0/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:38 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_1/iteration_0
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# Good ROcking Metal Altar for Chronical Sinners
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.1800"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.1800"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.4200"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.6800"
+@ s7 legend "\xD\f{}H \xl\f{} to 0.7600"
+0.0000    0 -34443.301 92.597755 0.0000000 23.060748 38.017041 49.112586 57.216179
+0.2000    0 -34545.355 29.916248 0.0000000 13.415580 25.841950 36.032550 43.827893
+0.4000    1 -34573.512 19.315325 10.328051 0.0000000 6.2289556 13.864626 20.475705
+0.6000    2 -34537.039 46.042034 34.746619 -2.3416570 0.0000000 6.7094276 13.178810
+0.8000    2 -34504.797 26.582932 92.888954 2.9817397 0.0000000 5.1950481 11.060175
+1.0000    2 -34597.062 76.209625 -6.8851439 -9.2199948 0.0000000 9.2225502 16.762705
+1.2000    1 -34645.754 59.870655 -5.9310907 0.0000000 10.951455 20.831724 28.667682
+1.4000    1 -34608.500 83.173462 -15.765495 0.0000000 13.482564 24.320257 32.536224
+1.6000    0 -34702.449 91.427101 0.0000000 23.076068 38.237809 49.537286 57.808264
+1.8000    0 -34636.801 83.105667 0.0000000 21.617057 36.200329 47.160947 55.216149
+2.0000    0 -34582.664 77.326607 0.0000000 20.710449 35.104649 46.053064 54.150658
+2.2000    0 -34621.633 72.612846 0.0000000 20.371620 35.052903 46.339511 54.727704
+2.4000    0 -34697.633 22.681007 0.0000000 13.298198 26.286240 36.942733 45.078204
+2.6000    1 -34675.859 26.216412 7.9667612 0.0000000 7.0089467 14.935248 21.638223
+2.8000    2 -34693.883 68.969444 -4.9065520 -8.3384412 0.0000000 8.3217972 15.088121
+3.0000    1 -34830.324 83.215111 -17.703334 0.0000000 13.053636 23.174696 30.730685
+3.2000    0 -34759.859 78.371361 0.0000000 20.026790 33.373765 43.385355 50.741192
+3.4000    0 -34852.457 72.025925 0.0000000 19.205421 32.473984 42.536655 49.967038
+3.6000    0 -34892.082 63.032909 0.0000000 17.366710 29.668915 39.065662 46.026845
+3.8000    0 -34862.301 78.490425 0.0000000 19.984802 33.251214 43.185771 50.478333
+4.0000    1 -34712.207 29.889591 3.8167387 0.0000000 7.0435212 14.620117 20.987546
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_1/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_1/dhdl.xvg
new file mode 100644
index 00000000..511ce9bd
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_1/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:41 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_1/iteration_1
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# Gyas ROwers Mature At Cryogenic Speed
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.4200"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.1800"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.4200"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.6800"
+@ s7 legend "\xD\f{}H \xl\f{} to 0.7600"
+4.0000    1 -34536.086 29.795559 3.8372179 0.0000000 7.0307286 14.599871 20.962844
+4.2000    1 -34534.352 70.381546 -12.581901 0.0000000 11.597162 21.089125 28.343406
+4.4000    0 -34557.211 90.596390 0.0000000 22.733631 37.597237 48.659818 56.753277
+4.6000    0 -34613.070 52.785580 0.0000000 17.286594 30.684376 41.078763 48.817528
+4.8000    0 -34590.133 69.891281 0.0000000 19.031192 32.354848 42.482189 49.964215
+5.0000    0 -34641.594 82.358505 0.0000000 21.104812 35.207131 45.793859 53.574639
+5.2000    0 -34630.590 55.930111 0.0000000 16.972682 29.901005 39.998634 47.558106
+5.4000    1 -34501.445 6.8057194 15.179602 0.0000000 4.5338624 11.008780 16.768989
+5.6000    2 -34446.824 70.026398 -11.688033 -8.9736093 0.0000000 8.2860802 14.891584
+5.8000    1 -34374.215 70.087471 -12.557361 0.0000000 11.549276 21.004727 28.232688
+6.0000    0 -34414.961 55.208706 0.0000000 16.862380 29.769925 39.868238 47.434685
+6.2000    0 -34362.062 62.974998 0.0000000 17.749442 30.536915 40.350191 47.634520
+6.4000    0 -34381.938 76.328827 0.0000000 19.696178 32.891114 42.786216 50.050915
+6.6000    0 -34421.820 81.035713 0.0000000 20.691532 34.455193 44.765049 52.332781
+6.8000    1 -34386.133 41.569485 1.4271513 0.0000000 8.6449559 17.158459 24.080119
+7.0000    2 -34381.691 38.757725 25.724690 -2.0148271 0.0000000 5.6739300 11.184824
+7.2000    2 -34411.191 48.626472 17.536701 -3.8829282 0.0000000 6.6149300 12.666441
+7.4000    2 -34361.844 62.080593 8.3049233 -6.4965604 0.0000000 7.8353584 14.499807
+7.6000    1 -34403.656 58.679588 -7.0876688 0.0000000 10.402639 19.518343 26.663846
+7.8000    1 -34488.086 86.682571 -18.630245 0.0000000 13.533948 23.959849 31.715451
+8.0000    0 -34470.477 76.718300 0.0000000 20.077491 33.722607 44.014799 51.594773
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_0/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_0/dhdl.xvg
new file mode 100644
index 00000000..86d89b76
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_0/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:38 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_2/iteration_0
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# Good ROcking Metal Altar for Chronical Sinners
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.4200"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.4200"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.6800"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.7600"
+@ s7 legend "\xD\f{}H \xl\f{} to 0.8600"
+0.0000    0 -34558.027 98.723808 0.0000000 14.956292 26.051838 34.155431 44.299574
+0.2000    0 -34543.445 70.217529 0.0000000 11.635145 21.194529 28.502587 37.930287
+0.4000    0 -34623.500 83.901581 0.0000000 13.246080 23.561966 31.264409 41.048299
+0.6000    0 -34627.387 66.694382 0.0000000 11.292428 20.795921 28.142024 37.689766
+0.8000    0 -34630.051 92.523865 0.0000000 14.367456 25.361576 33.513044 43.823331
+1.0000    0 -34604.461 77.432487 0.0000000 12.694188 23.044100 30.948182 41.147965
+1.2000    0 -34585.180 30.772110 0.0000000 7.4891599 15.652910 22.522199 31.890009
+1.4000    0 -34688.898 19.479511 0.0000000 6.4550985 14.318562 21.071363 30.352739
+1.6000    0 -34791.102 61.715427 0.0000000 10.889390 20.408228 27.870702 37.650884
+1.8000    0 -34676.934 64.374435 0.0000000 11.169267 20.797165 28.311511 38.136722
+2.0000    0 -34596.297 38.328201 0.0000000 8.2259496 16.541449 23.376210 32.573317
+2.2000    0 -34469.406 14.627542 0.0000000 6.3683599 14.677107 21.891582 31.842276
+2.4000    1 -34424.031 24.925966 1.6117064 0.0000000 4.7275017 10.117052 18.443624
+2.6000    1 -34490.535 48.790497 -3.3173612 0.0000000 6.8010014 13.130161 22.137404
+2.8000    0 -34604.250 37.173370 0.0000000 8.4756673 17.254763 24.480949 34.191385
+3.0000    0 -34688.332 56.605885 0.0000000 10.687032 20.527428 28.366323 38.726083
+3.2000    0 -34625.945 30.744106 0.0000000 7.8063062 16.408259 23.631595 33.453489
+3.4000    0 -34757.172 -11.770284 0.0000000 3.1281713 9.8574830 16.255677 25.496659
+3.6000    1 -34639.828 50.390167 -3.8163579 0.0000000 6.9351967 13.346870 22.472350
+3.8000    1 -34707.344 8.1764107 5.9796931 0.0000000 3.4317234 8.3482168 16.393526
+4.0000    1 -34779.789 24.187702 2.0787799 0.0000000 4.7622345 10.279852 18.835546
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_1/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_1/dhdl.xvg
new file mode 100644
index 00000000..9e1f095f
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_1/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:41 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_2/iteration_1
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# Gyas ROwers Mature At Cryogenic Speed
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.5700"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.4200"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.6800"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.7600"
+@ s7 legend "\xD\f{}H \xl\f{} to 0.8600"
+4.0000    1 -34567.148 24.147297 2.0943858 0.0000000 4.7605675 10.279325 18.837947
+4.2000    1 -34634.715 25.482727 2.0090625 0.0000000 4.9374722 10.613321 19.394332
+4.4000    1 -34540.645 38.114258 -0.88447505 0.0000000 5.9480980 11.978592 20.880584
+4.6000    1 -34459.148 44.981007 -2.2873872 0.0000000 6.5157154 12.759378 21.755325
+4.8000    1 -34330.551 40.923508 -2.0198568 0.0000000 6.0191505 11.884540 20.425304
+5.0000    1 -34426.164 28.420189 0.94379490 0.0000000 5.0308674 10.545742 18.932281
+5.2000    1 -34406.238 62.352551 -6.1328710 0.0000000 8.0506752 15.071867 24.788060
+5.4000    0 -34405.633 -18.816635 0.0000000 2.2137654 8.4111539 14.472705 23.335853
+5.6000    1 -34419.762 68.732956 -8.1613286 0.0000000 8.3816784 15.294836 24.610726
+5.8000    0 -34260.867 22.692657 0.0000000 6.5395864 14.227537 20.818780 29.892428
+6.0000    1 -34340.242 35.485474 -0.17342121 0.0000000 5.7375496 11.675136 20.493255
+6.2000    1 -34358.496 40.092739 -1.3969319 0.0000000 6.0735024 12.100450 20.917737
+6.4000    1 -34388.797 71.700455 -8.2754415 0.0000000 8.7845638 16.041797 25.802079
+6.6000    0 -34391.273 56.747177 0.0000000 10.239751 19.346224 26.519370 35.939476
+6.8000    0 -34384.961 20.601608 0.0000000 6.2730573 13.754404 20.169459 28.989816
+7.0000    1 -34300.277 12.316678 4.4199054 0.0000000 3.5052517 8.0901812 15.400408
+7.2000    2 -34253.223 19.443687 18.318740 0.90864009 0.0000000 2.7357073 8.7057779
+7.4000    1 -34315.672 26.482738 1.0465266 0.0000000 4.7091492 9.8662089 17.687528
+7.6000    1 -34372.086 84.288895 -11.395139 0.0000000 9.7741821 17.396818 27.364657
+7.8000    0 -34435.820 -5.8192053 0.0000000 3.4711755 9.8730909 15.798417 24.229875
+8.0000    1 -34409.645 53.422535 -4.8654454 0.0000000 6.9741848 13.091297 21.551518
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_0/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_0/dhdl.xvg
new file mode 100644
index 00000000..a8584f3f
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_0/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:38 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_3/iteration_0
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# Good ROcking Metal Altar for Chronical Sinners
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.5700"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.6800"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.7600"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.8600"
+@ s7 legend "\xD\f{}H \xl\f{} to 1.0000"
+0.0000    0 -34540.129 100.49866 0.0000000 11.095546 19.199138 29.343281 43.531656
+0.2000    0 -34590.309 49.700745 0.0000000 6.7280205 12.841949 21.453620 34.696964
+0.4000    0 -34513.477 84.650406 0.0000000 9.9029091 17.704252 27.961602 42.968847
+0.6000    0 -34491.766 52.951572 0.0000000 7.2474392 13.877050 23.225322 37.598627
+0.8000    0 -34446.789 42.704487 0.0000000 6.3417538 12.558632 21.625589 35.942901
+1.0000    0 -34484.246 50.779144 0.0000000 6.9183300 13.275133 22.316495 36.368677
+1.2000    0 -34445.914 -116.46031 0.0000000 -6.8484656 -6.4292189 -1.9697713 8.8258229
+1.4000    2 -34569.938 1.7394466 14.937879 2.0423865 0.0000000 2.7285307 12.627184
+1.6000    1 -34620.133 -19.742035 7.1241276 0.0000000 0.32980730 4.8585365 16.070840
+1.8000    1 -34540.477 -38.321430 9.6851899 0.0000000 -0.98418234 2.2269476 11.886823
+2.0000    2 -34555.801 6.7902451 10.522247 1.1994390 0.0000000 2.7879675 11.873910
+2.2000    1 -34602.969 -2.0144739 4.0914947 0.0000000 1.3583718 6.3750465 17.403891
+2.4000    1 -34529.535 -74.257820 15.282469 0.0000000 -3.2883553 -1.8125488 6.6320345
+2.6000    2 -34545.094 21.642651 6.6418168 -0.16101076 0.0000000 4.0425174 14.258680
+2.8000    1 -34501.172 -82.870224 16.243625 0.0000000 -3.9193542 -3.0356102 4.9325833
+3.0000    2 -34537.199 -27.186066 23.765002 4.8240393 0.0000000 0.36713911 8.1736953
+3.2000    2 -34538.555 -98.908852 46.613995 11.878667 0.0000000 -5.2634926 -1.6672953
+3.4000    2 -34557.547 -71.023743 37.677826 9.1292776 0.0000000 -3.0791920 2.1466351
+3.6000    1 -34509.117 -88.404030 17.099763 0.0000000 -4.2454881 -3.4895723 4.6760752
+3.8000    2 -34607.887 -48.622986 31.586461 7.0796804 0.0000000 -1.1343806 6.1275737
+4.0000    1 -34547.109 -86.272667 17.281981 0.0000000 -4.0004194 -2.8993087 5.5865369
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_1/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_1/dhdl.xvg
new file mode 100644
index 00000000..bac97f25
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_1/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:41 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_3/iteration_1
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# Gyas ROwers Mature At Cryogenic Speed
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.6800"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.6800"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.7600"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.8600"
+@ s7 legend "\xD\f{}H \xl\f{} to 1.0000"
+4.0000    1 -34865.773 -86.368622 17.306136 0.0000000 -4.0043254 -2.9020731 5.5888121
+4.2000    1 -34902.734 -30.364037 8.0715734 0.0000000 -0.54762076 2.9114692 12.817898
+4.4000    2 -34882.805 -3.4873972 14.974183 2.3688454 0.0000000 2.1489256 11.210299
+4.6000    1 -34813.523 -115.05903 21.017043 0.0000000 -6.0217547 -6.7492015 0.16403219
+4.8000    2 -34797.430 -63.292355 35.552286 8.4262785 0.0000000 -2.4021796 3.5290747
+5.0000    2 -34714.504 11.299449 10.734492 0.95961855 0.0000000 3.3388088 13.328189
+5.2000    1 -34667.297 -103.44635 20.001089 0.0000000 -5.0730867 -4.6959488 3.4382302
+5.4000    2 -34616.023 -101.13643 49.237663 12.302009 0.0000000 -5.2862215 -1.4572814
+5.6000    2 -34592.965 -41.386707 27.747991 6.1496372 0.0000000 -0.82253956 5.8893497
+5.8000    2 -34512.500 -72.215378 36.896324 9.1032350 0.0000000 -3.3137383 1.3691077
+6.0000    1 -34509.883 -250.14395 41.887876 0.0000000 -14.738578 -22.203424 -20.435140
+6.2000    2 -34482.641 -50.894749 31.380619 7.1364438 0.0000000 -1.5505838 4.5748667
+6.4000    2 -34566.047 -8.7048426 15.616304 2.7395565 0.0000000 1.5813048 9.7713534
+6.6000    1 -34601.863 -28.744118 7.9427849 0.0000000 -0.44202898 3.0331333 12.708274
+6.8000    2 -34692.965 -11.389771 17.797569 3.1337442 0.0000000 1.4477675 9.5879600
+7.0000    1 -34825.090 -94.880440 17.997850 0.0000000 -4.7396529 -4.6190333 2.5164022
+7.2000    2 -34802.016 -14.258325 18.661376 3.4366216 0.0000000 1.2802632 9.5486924
+7.4000    1 -34790.531 -108.75335 20.267333 0.0000000 -5.5707280 -5.8981857 1.2931432
+7.6000    2 -34832.719 -51.069721 31.479154 7.2031650 0.0000000 -1.4504411 5.2328593
+7.8000    2 -34783.875 -158.10257 67.920695 17.964928 0.0000000 -9.7161848 -9.1541055
+8.0000    3 -34793.789 -77.359856 103.87485 38.248480 14.137779 0.0000000 -2.3299037

From 8376dcdc26878ec57a292bf7f2a651882cab9484 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Mon, 8 Apr 2024 18:09:07 +0800
Subject: [PATCH 35/41] Added new data for unit tests; Tweaked
 stitch_time_series and added its unit test

---
 ensemble_md/analysis/analyze_traj.py          |  21 ++---
 .../sim_0/iteration_2/dhdl.xvg                |  53 ++++++++++++
 .../sim_1/iteration_2/dhdl.xvg                |  53 ++++++++++++
 .../sim_2/iteration_2/dhdl.xvg                |  53 ++++++++++++
 .../sim_3/iteration_2/.dhdl.xvg.swp           | Bin 0 -> 12288 bytes
 .../sim_3/iteration_2/dhdl.xvg                |  53 ++++++++++++
 ensemble_md/tests/test_analyze_traj.py        |  80 ++++++++++++------
 7 files changed, 271 insertions(+), 42 deletions(-)
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_2/dhdl.xvg
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_2/dhdl.xvg
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_2/dhdl.xvg
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_2/.dhdl.xvg.swp
 create mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_2/dhdl.xvg

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 25f48e2c..e123895a 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -94,32 +94,21 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
             files_sorted[i].append(files[rep_trajs[i][j]][j])
 
     # Then, stitch the trajectories for each starting configuration
+    # Unlike stitch_time_series_for_sim, there is no way to check the continuity.
     trajs = [[] for i in range(n_configs)]  # for each starting configuration
-    t_last, val_last = None, None    # just for checking the continuity of the trajectory
     for i in range(n_configs):
         for j in range(n_iter):
             if dhdl:
-                traj, t = extract_state_traj(files_sorted[i][j])
+                traj, _ = extract_state_traj(files_sorted[i][j])
             else:
                 traj = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, col_idx]
-                t = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, 0]
 
             # Shift the indices so that global indices are used.
             shift_idx = rep_trajs[i][j]
             traj = list(np.array(traj) + shifts[shift_idx])
 
-            if j != 0:
-                # Check the continuity of the trajectory
-                if traj[0] != val_last or t[0] != t_last:
-                    err_str = f'The first frame of iteration {j} of starting configuration {i} is not continuous with the last frame of the previous iteration. '  # noqa: E501
-                    err_str += f'Please check files {files_sorted[i][j - 1]} and {files_sorted[i][j]}.'
-                    raise ValueError(err_str)
-
-            t_last = t[-1]
-            val_last = traj[-1]
-
-            if j != 0:
-                traj = traj[:-1]  # remove the last frame, which is the same as the first of the next time series.
+            if j != n_iter - 1:
+                traj = traj[:-1]
 
             trajs[i].extend(traj)
 
@@ -200,7 +189,7 @@ def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
     return trajs
 
 
-def stitch_trajs(gmx_executable, files, rep_trajs):
+def stitch_xtc_trajs(gmx_executable, files, rep_trajs):
     """
     Demuxes GROMACS trajectories from different replicas into individual continuous trajectories.
 
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_2/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_2/dhdl.xvg
new file mode 100644
index 00000000..60c72a2d
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_0/iteration_2/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:44 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_0/iteration_2
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# GROup of MAchos and Cynical Suckers
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.1800"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.0000"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.1800"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.4200"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s7 legend "\xD\f{}H \xl\f{} to 0.6800"
+8.0000    1 -34629.660 76.294617 -12.171330 0.0000000 20.012351 33.639191 43.923604
+8.2000    1 -34651.223 53.820854 -5.7918213 0.0000000 16.636073 29.299222 39.133190
+8.4000    1 -34551.805 76.754662 -12.062535 0.0000000 20.355419 34.357167 44.960779
+8.6000    0 -34470.441 60.429405 0.0000000 12.838254 33.812664 48.070257 58.823763
+8.8000    1 -34470.359 50.146221 -5.6069839 0.0000000 15.713153 27.967770 37.615367
+9.0000    1 -34498.391 39.090370 -2.5533874 0.0000000 13.934211 25.543530 34.808979
+9.2000    1 -34556.098 84.098511 -14.461316 0.0000000 20.903842 34.415566 44.420412
+9.4000    0 -34616.480 70.126610 0.0000000 13.909344 35.086697 49.137125 59.653158
+9.6000    1 -34534.547 26.037083 1.3580391 0.0000000 12.167194 23.478886 32.722436
+9.8000    2 -34312.969 71.234009 -19.139816 -14.436126 0.0000000 11.326751 20.238176
+10.0000    0 -34378.742 76.646988 0.0000000 14.550442 35.442341 48.924097 58.903971
+10.2000    2 -34285.172 84.664841 -27.865017 -18.636821 0.0000000 13.043783 22.901348
+10.4000    1 -34497.484 63.202911 -8.3794296 0.0000000 18.313194 31.802937 42.233846
+10.6000    1 -34578.695 87.972961 -15.083676 0.0000000 21.971679 36.270589 46.895210
+10.8000    0 -34541.055 57.429951 0.0000000 12.311475 32.571461 46.366842 56.775666
+11.0000    0 -34542.328 54.882725 0.0000000 12.163686 32.820439 47.037683 57.802180
+11.2000    1 -34556.328 73.172264 -10.645428 0.0000000 20.349487 34.931084 46.128175
+11.4000    0 -34620.914 66.442963 0.0000000 13.768872 35.857712 50.844045 62.154873
+11.6000    1 -34630.789 87.372787 -14.095708 0.0000000 22.653880 37.868951 49.277689
+11.8000    0 -34617.402 84.743134 0.0000000 16.467885 40.928452 56.989053 68.961985
+12.0000    1 -34608.320 -64.654449 36.570163 0.0000000 2.2114481 12.925400 22.739461
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_2/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_2/dhdl.xvg
new file mode 100644
index 00000000..30a9ff97
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_1/iteration_2/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:44 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_1/iteration_2
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# GROup of MAchos and Cynical Suckers
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.1800"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.1800"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.4200"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.6800"
+@ s7 legend "\xD\f{}H \xl\f{} to 0.7600"
+8.0000    0 -34755.293 46.771725 0.0000000 15.872268 28.695901 38.829498 46.447546
+8.2000    0 -34785.953 74.836411 0.0000000 20.020610 33.922478 44.493968 52.312291
+8.4000    0 -34647.559 56.015610 0.0000000 17.202335 30.347924 40.604961 48.276854
+8.6000    0 -34641.148 -4.2277699 0.0000000 8.7699007 20.164078 29.975124 37.611529
+8.8000    1 -34616.250 69.080246 -10.322360 0.0000000 11.894403 22.082836 30.019714
+9.0000    1 -34614.609 42.226288 0.41087019 0.0000000 8.9144948 17.931109 25.390445
+9.2000    1 -34468.852 47.732052 -2.6941484 0.0000000 9.2777303 18.100653 25.243219
+9.4000    0 -34504.332 55.423943 0.0000000 16.886680 29.825406 39.961572 47.562781
+9.6000    1 -34515.789 59.769459 -6.4545549 0.0000000 10.655896 19.998678 27.305587
+9.8000    1 -34515.922 -3.8737392 21.757142 0.0000000 3.6053060 9.9567376 15.796676
+10.0000    2 -34563.215 -34.931252 159.09013 15.563947 0.0000000 -0.40763683 2.4860333
+10.2000    3 -34533.957 -2.8320389 295.73599 31.136484 4.1779768 0.0000000 1.2499733
+10.4000    3 -34558.859 -2.1180019 280.30180 30.889294 4.1170456 0.0000000 1.3188639
+10.6000    4 -34530.309 -30.518177 751.66180 96.462129 24.109847 5.0418812 0.0000000
+10.8000    4 -34537.238 -71.261162 930.65434 133.39602 36.839697 9.0098523 0.0000000
+11.0000    4 -34461.461 -87.927658 1239.1910 157.80551 43.247558 10.757349 0.0000000
+11.2000    4 -34466.941 -82.624626 889.63010 142.10997 40.461601 10.151953 0.0000000
+11.4000    4 -34466.344 -123.39691 1340.7995 193.15680 55.609261 14.434419 0.0000000
+11.6000    4 -34515.422 -127.27545 1497.2377 203.23258 57.821370 14.932713 0.0000000
+11.8000    4 -34671.367 -175.34641 1967.8005 259.49930 74.645520 19.813017 0.0000000
+12.0000    4 -34669.914 -206.61479 2211.1280 291.95570 84.940968 22.912667 0.0000000
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_2/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_2/dhdl.xvg
new file mode 100644
index 00000000..c501ad02
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_2/iteration_2/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:44 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_2/iteration_2
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# GROup of MAchos and Cynical Suckers
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.5700"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.4200"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.6800"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.7600"
+@ s7 legend "\xD\f{}H \xl\f{} to 0.8600"
+8.0000    1 -34313.500 53.922421 -4.9744428 0.0000000 7.0177010 13.155697 21.633762
+8.2000    1 -34317.035 54.985249 -5.1219533 0.0000000 7.1659649 13.451778 22.153823
+8.4000    0 -34343.859 64.260559 0.0000000 11.269674 20.979761 28.515765 38.314333
+8.6000    0 -34327.445 42.952110 0.0000000 8.9256910 17.692659 24.808122 34.303222
+8.8000    0 -34193.602 30.741173 0.0000000 7.3090871 15.119509 21.632564 30.461708
+9.0000    1 -34165.562 17.672602 3.5807292 0.0000000 4.0475147 8.9581755 16.590528
+9.2000    2 -34196.078 53.809811 -1.9005395 -4.2808140 0.0000000 4.9581900 12.597696
+9.4000    1 -34121.391 30.466057 -0.028901401 0.0000000 4.9883106 10.199266 17.972795
+9.6000    2 -34181.188 12.930644 22.614589 1.8607628 0.0000000 2.2793522 7.7807644
+9.8000    2 -34159.910 -83.369080 82.336740 16.540114 0.0000000 -3.9512120 -3.2089027
+10.0000    3 -34253.008 -79.733406 147.43616 40.507554 9.9642161 0.0000000 -3.8256816
+10.2000    3 -34316.266 -100.41053 169.57082 47.693830 12.082289 0.0000000 -5.3537159
+10.4000    3 -34287.016 -38.847527 117.40750 29.041785 6.1480040 0.0000000 -0.39827370
+10.6000    3 -34357.711 -7.0617037 73.913992 16.967344 2.8104849 0.0000000 1.9357874
+10.8000    2 -34354.020 34.890873 8.7370223 -1.3708427 0.0000000 3.7911577 10.761747
+11.0000    1 -34391.375 -18.832100 11.990726 0.0000000 1.0368603 4.7231291 11.600601
+11.2000    2 -34355.613 53.997337 -1.6474591 -4.2284115 0.0000000 5.0100889 12.806016
+11.4000    1 -34464.656 -3.4788733 8.4742171 0.0000000 2.2977398 6.4826487 13.639728
+11.6000    1 -34486.930 -16.786358 10.481138 0.0000000 1.0813492 4.7300642 11.606537
+11.8000    2 -34525.945 -17.548145 39.956987 6.5848033 0.0000000 0.41567518 4.8993547
+12.0000    2 -34481.375 -113.31747 102.25158 21.396274 0.0000000 -5.7530722 -6.0001237
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_2/.dhdl.xvg.swp b/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_2/.dhdl.xvg.swp
new file mode 100644
index 0000000000000000000000000000000000000000..faf6c02c843d38c38d2c8fd5816f571958d0b5de
GIT binary patch
literal 12288
zcmeHNOOGR06|MoodqPCkz@cX|gHqS`{b(^GCOxeTAx)y`4k2QYRc+sHSK4+NSM{_T
z1c(K@gaxem4`joJ4g7*E*t3X;m_LBRlJEM_wqe4sgs4h=?CZMce)pX3JjyNKWB$Ew
zACPA<!}VO2z4QC$PwxEjm(Tp}FIo0REc2r}-mlNo<VRlzMeoI`I?G?1R=H=xs#gr3
zsQj}vVCP=dJ0HbyQ+Hk|Phz(mclTcJzF3_P<FxZ2*0GrO2I=nMpqw^!_m$$f6UTKN
z9S>u7)ZZF6u?9@PXo^mM&>wc{a&$f{nz9;q<E$7>hB4jhs!rL&>EgbeYqHs+(<d6*
z-fS88STb-uQ&RBc+KrvB)AL_`{bT86>r~4?%RtLO%RtLO%RtLO%RtLO%fOQvXeQUP
zpJVD1%W1QGzxFTh%a83_%RtLO%RtLO%RtLO%RtLO%RtLO%RtLO%RtM({~-fKKg)jm
zN&N2rDHxC6|5yM2zxSmq`xE5HkPC>1T!Z}L*)01JWFPW1$X6gAUeB_>L*9eD3ps-v
zL+(Sq0r@KAS;#-Wm}MV8{tS5+@(!d6v5@PKzkVUh{s8$k<Tm7QpU<-2KyE`Ge-2~F
z9mwO)Vhp(hdHfmZL%s?5;L}<5D~N}@|4f#>4e^lo;al?W?c`&7wG6Zj{BJTKB;&XU
zm!@24r#N%QQ!XqE-f6>0c*TtNL6GN6T5eS!6_|BKTR{NON(X0&&yfI@3BmjlR4qUQ
z6AD9N4C7XtK*DIItoNQQXI@~SNNOgzaMrA$umrHy1?3Z}-7*#Cc&iBarFR_51k4KB
zD@VpN?SKSO2eYK&*eGc$@5LHEVd^bQHD|_3Y%3JA${P&_VZ7wdkq%5s>$w9Z*hfk2
z*HDxj=A{$d*n}!KQ9%Z?GEi=@&T}CR1<j0tk|m{C;5H;w$-Kc<auXF!TcK452^FC8
zP|aBl$}+`)>KxdW4jTl|ypqr-%z1CEwjhPMz=PgGl|Xgy5v+p5B`VxTBstKGO|c4q
zP>73lOgR-KsCFy_7Xsh35S$0)yhYMr=!7;3#=_$ztVpolr#T0fQkp1<6ieJ|<HVL>
zf;lCXaw!K8O2^?30SpLWE5WYfwSd)<1)1XIm{g7nfiT-(;QMB5uu36Ez%FS-NojmU
z3Nnqv&@Q2_;x*^M3q;3p$|X+e329LTQb7g9VG@AAq}xa%*ebjh;M0*~pr0d;6Y45n
z3sl&uILt;CC?yQA5G5Oe*BZwG@hXtAo6I5~f^}RA!b%I$g3b_>&2V{_iW9tAgD`+C
zM+zB_jFKFnPCIT;ZdbJlpHg68swBk(*C7ZC&{eROIe?r&o#Y1LK$gQQl}b8*05~YE
zLl4lKpn@3*L$v}u!6+*{2k0tP3-=xJcV1d5c_ZPpao|$RRKtczv=O-lI-m6>Alu;_
zz_sx}U4?311RjAGDfKMK;-DT~Dlmi0hX2?I=_A~{M+7g>aJG#`B?h6B|EoyNL0v#C
z!@k@bbOR+kYLi>Ah2l=FFQ5r1bcWQ-B(6<R<O$k_bUH&wmyudn8d6t4q?`u10QIR{
zQ)PlRB!YwS0aby%qpb;BO;9r7g5^>IbSqMxgSbSHK!DIwrAVz0HR#d$5~%>%&Z(52
z&I#ayRE;7R)&X@Ds)ea2eTi~U4*_G)0?<%3!x~g>>U{!jN8%ns!C5WThW~IMrg)Pu
zw*!Sfk6g#Dm@vR{<SoM!M22~GrQ}ETqA4hY*0hP4r$>?q4x9sZJ5pXV6x+N9ax^A^
z#}x;Is78bzmRgHcBh<6t!uYL7&8ty3EwgV??PwTJ<G4>d4`=rtp8Vve_vztmm<H5T
zwCKD$NzwjC#q>nte6DDoqUctN`YDP?wgsYkiXytwLQ!5R_V0K4D?c9fFY;kAI_?*A
z8xgxQR9tBvRZTIZ{c)V0Uee98mph|scx&zqU+WCwbX4^($Hl1ZQQZ_xOd3n}hH-q_
z3@A~$yo1>J7t(g?TR5)VTuvfAD&xia?r}AvYP|kDA14(o7A8R~Ufq1`4K(ZX<ECte
zk#>&YlESm`=67xp%sG1VwSAghLIu}lF)M4LVR0OXw6n=bfp}kqd+9IzV9D1HA0Fo7
z;dkf$17)^_a#W5Y-JH~`*`*mD1-_Hr$ZpU}4_-Z=P<28FcYA}XrnLRu<+#M*Z8|*f
zoyBQA*E=1}Xw;vcj|p~9ifNP9yhIP^RXU)%v`2@#KqFe~<u%opb$5TCHIpgj<AwyD
z;NxaADM>(guNsZgZo_gM)7m$x=~+2GrG7cxCF)eYSd2T9Y4uj@;aoRFAUTDYbS~nc
zcK8C)+28;EKF)Se;uM~zGvs&2&0t#e;yBLdIw$2U_PZCcJRLMRPTsFN>5zEW{rf?1
z?0*j@$7wQ+Sd{jED>j`$HHz!)CTFL0=X6?)ieBBpHzLUlmhI1C@4P8+ARVjz%VnM8
za@+w^SdPD|Yd61@;&E_y?~w8}lP}O>v%DW4<rw1Yf%N%F{{-Jt3@`e0Gf({OO(XKY
oPsf+()6oENIw_Hd7e!6I<=jCvrk4>zfv2zD<$f0rl!Tc56YPf*uK)l5

literal 0
HcmV?d00001

diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_2/dhdl.xvg b/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_2/dhdl.xvg
new file mode 100644
index 00000000..9480c08a
--- /dev/null
+++ b/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_2/dhdl.xvg
@@ -0,0 +1,53 @@
+# This file was created Mon Jun 19 04:08:44 2023
+# Created by:
+#          :-) GROMACS - gmx mdrun, 2022.5-dev-20230428-fdf57150ad (-:
+# 
+# Executable:   /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+# Data prefix:  /jet/home/wehs7661/pkgs/gromacs/2022.5
+# Working dir:  /ocean/projects/cts160011p/wehs7661/EEXE_experiments/Anthracene/EEXE/fixed_weight/Redo/test_7/sim_3/iteration_2
+# Command line:
+#   gmx mdrun -s sys_EE.tpr -nt 16 -ntmpi 1
+# gmx mdrun is part of G R O M A C S:
+#
+# GROup of MAchos and Cynical Suckers
+#
+@    title "dH/d\xl\f{} and \xD\f{}H"
+@    xaxis  label "Time (ps)"
+@    yaxis  label "dH/d\xl\f{} and \xD\f{}H (kJ/mol [\xl\f{}]\S-1\N)"
+@TYPE xy
+@ subtitle "T = 300 (K) "
+@ view 0.15, 0.15, 0.75, 0.85
+@ legend on
+@ legend box on
+@ legend loctype view
+@ legend 0.78, 0.8
+@ legend length 2
+@ s0 legend "Thermodynamic state"
+@ s1 legend "Total Energy (kJ/mol)"
+@ s2 legend "dH/d\xl\f{} vdw-lambda = 0.8600"
+@ s3 legend "\xD\f{}H \xl\f{} to 0.5700"
+@ s4 legend "\xD\f{}H \xl\f{} to 0.6800"
+@ s5 legend "\xD\f{}H \xl\f{} to 0.7600"
+@ s6 legend "\xD\f{}H \xl\f{} to 0.8600"
+@ s7 legend "\xD\f{}H \xl\f{} to 1.0000"
+8.0000    3 -34784.020 -77.079910 103.68410 38.166362 14.101582 0.0000000 -2.3011742
+8.2000    3 -34860.699 -58.251957 86.461296 31.417711 11.356713 0.0000000 -0.75197034
+8.4000    3 -34859.223 -16.553837 54.195003 18.130927 5.7016413 0.0000000 3.2131857
+8.6000    2 -34805.293 -131.15421 58.714853 15.259576 0.0000000 -7.6177195 -5.4159167
+8.8000    3 -34891.207 -74.783188 98.909443 36.761498 13.646595 0.0000000 -2.1796022
+9.0000    3 -34795.844 -85.563187 106.10056 39.968034 15.062222 0.0000000 -3.2385274
+9.2000    3 -34817.168 -62.806141 89.513820 32.832173 11.981368 0.0000000 -1.1259146
+9.4000    2 -34871.062 -132.36244 57.814263 15.243424 0.0000000 -7.8148593 -5.9443458
+9.6000    2 -34903.781 -51.724876 31.809730 7.2676394 0.0000000 -1.5270443 4.9536180
+9.8000    2 -34873.547 -65.990837 37.090620 8.7470559 0.0000000 -2.6001132 3.1049916
+10.0000    2 -34842.301 -68.331505 36.699844 8.8441776 0.0000000 -2.8967041 2.3632594
+10.2000    2 -34814.570 -28.212650 23.207305 4.8157071 0.0000000 0.18411137 7.6140201
+10.4000    1 -34842.922 -17.237011 6.6958868 0.0000000 0.43287851 4.8234432 15.423047
+10.6000    1 -34889.977 -50.003258 11.445712 0.0000000 -1.7323375 0.94870832 10.348200
+10.8000    2 -34907.156 -159.88383 67.373923 17.997990 0.0000000 -9.9937868 -9.9714539
+11.0000    3 -34936.309 -77.132729 98.832614 36.886646 13.790942 0.0000000 -2.6671138
+11.2000    3 -34800.629 -68.980125 94.532006 34.906907 12.851202 0.0000000 -1.6614399
+11.4000    3 -34740.562 -113.63854 129.12077 49.226807 18.943248 0.0000000 -5.8271056
+11.6000    4 -34780.863 11.287065 139.94837 58.477653 27.002308 6.8633881 0.0000000
+11.8000    3 -34739.133 -155.06459 154.36888 60.818883 24.201765 0.0000000 -10.067938
+12.0000    4 -34730.758 -16.987450 183.54891 80.260639 39.675461 12.434976 0.0000000
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index d6bab343..1558f02a 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -58,6 +58,60 @@ def test_extract_state_traj():
 
 
 def test_stitch_time_series():
+    folder = os.path.join(input_path, 'dhdl/simulation_example')
+    files = [[f'{folder}/sim_{i}/iteration_{j}/dhdl.xvg' for j in range(3)] for i in range(4)]
+    rep_trajs = np.array([[0, 0, 1], [1, 1, 0], [2, 2, 2], [3, 3, 3]])
+    shifts = [0, 1, 2, 3]
+
+    trajs = analyze_traj.stitch_time_series(files, rep_trajs, shifts)
+    assert trajs[0] == [
+        0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+        1, 1, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 1, 0, 1, 1,
+        1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5
+    ]
+    assert trajs[1] == [
+        1, 1, 2, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 2, 3, 2, 1, 1, 1, 1,
+        2, 2, 1, 1, 1, 1, 1, 2, 3, 2, 1, 1, 1, 1, 2, 3, 3, 3, 2, 2,
+        1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 0, 2, 1, 1, 0, 0, 1, 0, 1, 0, 1
+    ]
+    assert trajs[2] == [
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2, 3, 3,
+        3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 3, 3, 2, 2, 3, 4, 3, 3, 2,
+        3, 3, 2, 2, 2, 3, 4, 3, 4, 4, 5, 5, 5, 5, 4, 3, 4, 3, 3, 4, 4
+    ]
+    assert trajs[3] == [
+        3, 3, 3, 3, 3, 3, 3, 5, 4, 4, 5, 4, 4, 5, 4, 5, 5, 5, 4, 5,
+        4, 4, 5, 4, 5, 5, 4, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 4, 5, 5,
+        6, 6, 6, 5, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 5, 6, 6, 6, 7, 6, 7
+    ]
+
+    assert os.path.exists('state_trajs.npy')
+    os.remove('state_trajs.npy')
+
+
+def test_stitch_time_series_for_sim():
+    # Set up files for testing
+    for sim in range(2):
+        for iteration in range(2):
+            target_dir = f'ensemble_md/tests/data/stitch_test/sim_{sim}/iteration_{iteration}'
+            os.makedirs(target_dir)
+            shutil.copy(f'ensemble_md/tests/data/dhdl/dhdl_{sim * 2 + iteration}.xvg', f'{target_dir}/dhdl.xvg')
+            save_and_exclude(f'{target_dir}/dhdl.xvg', 40)  # just keep the first 10 frames
+
+    # files = [[f'ensemble_md/tests/data/stitch_test/sim_{i}/iteration_{j}/dhdl_short.xvg' for j in range(2)] for i in range(2)]  # noqa: E501
+    # shifts = [1, 1]
+
+    # More to come ...
+    # trajs_test = analyze_traj.stitch_time_series_for_sim(files, shifts, save=True)
+    # trajs_expected = [
+    #     [0, 0, 3, 1, 4, 4, 5, 4, 5, 5, 4]
+    # ]
+
+    # Clean up
+    shutil.rmtree('ensemble_md/tests/data/stitch_test')
+
+
+def test_stitch_xtc_trajs():
     pass
 
 
@@ -115,32 +169,6 @@ def test_convert_npy2xvg():
     os.chdir('../../../')
 
 
-def test_stitch_time_series_for_sim():
-    # Set up files for testing
-    for sim in range(2):
-        for iteration in range(2):
-            target_dir = f'ensemble_md/tests/data/stitch_test/sim_{sim}/iteration_{iteration}'
-            os.makedirs(target_dir)
-            shutil.copy(f'ensemble_md/tests/data/dhdl/dhdl_{sim * 2 + iteration}.xvg', f'{target_dir}/dhdl.xvg')
-            save_and_exclude(f'{target_dir}/dhdl.xvg', 40)  # just keep the first 10 frames
-
-    # files = [[f'ensemble_md/tests/data/stitch_test/sim_{i}/iteration_{j}/dhdl_short.xvg' for j in range(2)] for i in range(2)]  # noqa: E501
-    # shifts = [1, 1]
-
-    # More to come ...
-    # trajs_test = analyze_traj.stitch_time_series_for_sim(files, shifts, save=True)
-    # trajs_expected = [
-    #     [0, 0, 3, 1, 4, 4, 5, 4, 5, 5, 4]
-    # ]
-
-    # Clean up
-    shutil.rmtree('ensemble_md/tests/data/stitch_test')
-
-
-def test_stitch_trajs():
-    pass
-
-
 def test_traj2transmtx():
     traj = [0, 1, 2, 1, 0, 3]
     N = 4  # matrix size

From 83fed4443a7f014d08ee5d568e929ca38234e66a Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 9 Apr 2024 04:04:29 +0800
Subject: [PATCH 36/41] Tweaked stitch_time_series_for_sim; Added unit tests
 for stitch_time_series_for_sim and stitch_xtc_trajs

---
 ensemble_md/analysis/analyze_traj.py          |  22 ++--
 .../sim_3/iteration_2/.dhdl.xvg.swp           | Bin 12288 -> 0 bytes
 ensemble_md/tests/test_analyze_traj.py        | 119 +++++++++++++++---
 3 files changed, 111 insertions(+), 30 deletions(-)
 delete mode 100644 ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_2/.dhdl.xvg.swp

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index e123895a..7db40204 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -100,13 +100,12 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
         for j in range(n_iter):
             if dhdl:
                 traj, _ = extract_state_traj(files_sorted[i][j])
+                # Shift the indices so that global indices are used.
+                shift_idx = rep_trajs[i][j]
+                traj = list(np.array(traj) + shifts[shift_idx])
             else:
                 traj = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, col_idx]
-
-            # Shift the indices so that global indices are used.
-            shift_idx = rep_trajs[i][j]
-            traj = list(np.array(traj) + shifts[shift_idx])
-
+            
             if j != n_iter - 1:
                 traj = traj[:-1]
 
@@ -121,7 +120,7 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
     return trajs
 
 
-def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
+def stitch_time_series_for_sim(files, shifts, dhdl=True, col_idx=-1, save=True):
     """
     Stitches the state-space/CV-space time series in the same replica/simulation folder.
     That is, the output time series is contributed by multiple different trajectories (initiated by
@@ -133,6 +132,9 @@ def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
         A list of lists of file names of GROMACS DHDL files or general GROMACS XVG files
         or PLUMED output files. Specifically, :code:`files[i]` should be a list containing
         the files of interest from all iterations in replica :code:`i`. The files should be sorted naturally.
+    shifts : list
+        A list of values for shifting the state indices for each replica. The length of the list
+        should be equal to the number of replicas. This is only needed when :code:`dhdl=True`.
     dhdl : bool
         Whether the input files are GROMACS dhdl files, in which case trajectories of global alchemical indices
         will be generated. If :code:`dhdl=False`, the input files must be readable by `numpy.loadtxt` assuming that
@@ -164,9 +166,6 @@ def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
                 traj = np.loadtxt(files[i][j], comments=['#', '@'])[:, col_idx]
                 t = np.loadtxt(files[i][j], comments=['#', '@'])[:, 0]
 
-            # Note that there is no need to shift the indices for the same replica, which same the same set of states
-            # traj = list(np.array(traj) + shifts[i])
-
             if j != 0:
                 # Check the continuity of the trajectory
                 if traj[0] != val_last or t[0] != t_last:
@@ -177,10 +176,13 @@ def stitch_time_series_for_sim(files, dhdl=True, col_idx=-1, save=True):
             t_last = t[-1]
             val_last = traj[-1]
 
-            if j != 0:
+            if j != n_iter - 1:
                 traj = traj[:-1]  # remove the last frame, which is the same as the first of the next time series.
 
             trajs[i].extend(traj)
+        
+        # All segments for the same replica should have the same shift
+        trajs[i] = list(np.array(trajs[i]) + shifts[i])
 
     # Save the trajectories as an NPY file if desired
     if save is True:
diff --git a/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_2/.dhdl.xvg.swp b/ensemble_md/tests/data/dhdl/simulation_example/sim_3/iteration_2/.dhdl.xvg.swp
deleted file mode 100644
index faf6c02c843d38c38d2c8fd5816f571958d0b5de..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 12288
zcmeHNOOGR06|MoodqPCkz@cX|gHqS`{b(^GCOxeTAx)y`4k2QYRc+sHSK4+NSM{_T
z1c(K@gaxem4`joJ4g7*E*t3X;m_LBRlJEM_wqe4sgs4h=?CZMce)pX3JjyNKWB$Ew
zACPA<!}VO2z4QC$PwxEjm(Tp}FIo0REc2r}-mlNo<VRlzMeoI`I?G?1R=H=xs#gr3
zsQj}vVCP=dJ0HbyQ+Hk|Phz(mclTcJzF3_P<FxZ2*0GrO2I=nMpqw^!_m$$f6UTKN
z9S>u7)ZZF6u?9@PXo^mM&>wc{a&$f{nz9;q<E$7>hB4jhs!rL&>EgbeYqHs+(<d6*
z-fS88STb-uQ&RBc+KrvB)AL_`{bT86>r~4?%RtLO%RtLO%RtLO%RtLO%fOQvXeQUP
zpJVD1%W1QGzxFTh%a83_%RtLO%RtLO%RtLO%RtLO%RtLO%RtLO%RtM({~-fKKg)jm
zN&N2rDHxC6|5yM2zxSmq`xE5HkPC>1T!Z}L*)01JWFPW1$X6gAUeB_>L*9eD3ps-v
zL+(Sq0r@KAS;#-Wm}MV8{tS5+@(!d6v5@PKzkVUh{s8$k<Tm7QpU<-2KyE`Ge-2~F
z9mwO)Vhp(hdHfmZL%s?5;L}<5D~N}@|4f#>4e^lo;al?W?c`&7wG6Zj{BJTKB;&XU
zm!@24r#N%QQ!XqE-f6>0c*TtNL6GN6T5eS!6_|BKTR{NON(X0&&yfI@3BmjlR4qUQ
z6AD9N4C7XtK*DIItoNQQXI@~SNNOgzaMrA$umrHy1?3Z}-7*#Cc&iBarFR_51k4KB
zD@VpN?SKSO2eYK&*eGc$@5LHEVd^bQHD|_3Y%3JA${P&_VZ7wdkq%5s>$w9Z*hfk2
z*HDxj=A{$d*n}!KQ9%Z?GEi=@&T}CR1<j0tk|m{C;5H;w$-Kc<auXF!TcK452^FC8
zP|aBl$}+`)>KxdW4jTl|ypqr-%z1CEwjhPMz=PgGl|Xgy5v+p5B`VxTBstKGO|c4q
zP>73lOgR-KsCFy_7Xsh35S$0)yhYMr=!7;3#=_$ztVpolr#T0fQkp1<6ieJ|<HVL>
zf;lCXaw!K8O2^?30SpLWE5WYfwSd)<1)1XIm{g7nfiT-(;QMB5uu36Ez%FS-NojmU
z3Nnqv&@Q2_;x*^M3q;3p$|X+e329LTQb7g9VG@AAq}xa%*ebjh;M0*~pr0d;6Y45n
z3sl&uILt;CC?yQA5G5Oe*BZwG@hXtAo6I5~f^}RA!b%I$g3b_>&2V{_iW9tAgD`+C
zM+zB_jFKFnPCIT;ZdbJlpHg68swBk(*C7ZC&{eROIe?r&o#Y1LK$gQQl}b8*05~YE
zLl4lKpn@3*L$v}u!6+*{2k0tP3-=xJcV1d5c_ZPpao|$RRKtczv=O-lI-m6>Alu;_
zz_sx}U4?311RjAGDfKMK;-DT~Dlmi0hX2?I=_A~{M+7g>aJG#`B?h6B|EoyNL0v#C
z!@k@bbOR+kYLi>Ah2l=FFQ5r1bcWQ-B(6<R<O$k_bUH&wmyudn8d6t4q?`u10QIR{
zQ)PlRB!YwS0aby%qpb;BO;9r7g5^>IbSqMxgSbSHK!DIwrAVz0HR#d$5~%>%&Z(52
z&I#ayRE;7R)&X@Ds)ea2eTi~U4*_G)0?<%3!x~g>>U{!jN8%ns!C5WThW~IMrg)Pu
zw*!Sfk6g#Dm@vR{<SoM!M22~GrQ}ETqA4hY*0hP4r$>?q4x9sZJ5pXV6x+N9ax^A^
z#}x;Is78bzmRgHcBh<6t!uYL7&8ty3EwgV??PwTJ<G4>d4`=rtp8Vve_vztmm<H5T
zwCKD$NzwjC#q>nte6DDoqUctN`YDP?wgsYkiXytwLQ!5R_V0K4D?c9fFY;kAI_?*A
z8xgxQR9tBvRZTIZ{c)V0Uee98mph|scx&zqU+WCwbX4^($Hl1ZQQZ_xOd3n}hH-q_
z3@A~$yo1>J7t(g?TR5)VTuvfAD&xia?r}AvYP|kDA14(o7A8R~Ufq1`4K(ZX<ECte
zk#>&YlESm`=67xp%sG1VwSAghLIu}lF)M4LVR0OXw6n=bfp}kqd+9IzV9D1HA0Fo7
z;dkf$17)^_a#W5Y-JH~`*`*mD1-_Hr$ZpU}4_-Z=P<28FcYA}XrnLRu<+#M*Z8|*f
zoyBQA*E=1}Xw;vcj|p~9ifNP9yhIP^RXU)%v`2@#KqFe~<u%opb$5TCHIpgj<AwyD
z;NxaADM>(guNsZgZo_gM)7m$x=~+2GrG7cxCF)eYSd2T9Y4uj@;aoRFAUTDYbS~nc
zcK8C)+28;EKF)Se;uM~zGvs&2&0t#e;yBLdIw$2U_PZCcJRLMRPTsFN>5zEW{rf?1
z?0*j@$7wQ+Sd{jED>j`$HHz!)CTFL0=X6?)ieBBpHzLUlmhI1C@4P8+ARVjz%VnM8
za@+w^SdPD|Yd61@;&E_y?~w8}lP}O>v%DW4<rw1Yf%N%F{{-Jt3@`e0Gf({OO(XKY
oPsf+()6oENIw_Hd7e!6I<=jCvrk4>zfv2zD<$f0rl!Tc56YPf*uK)l5

diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index 1558f02a..d29d040e 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -11,6 +11,7 @@
 Unit tests for the module analyze_traj.py.
 """
 import os
+import pytest
 import shutil
 import numpy as np
 from unittest.mock import patch, MagicMock
@@ -63,6 +64,7 @@ def test_stitch_time_series():
     rep_trajs = np.array([[0, 0, 1], [1, 1, 0], [2, 2, 2], [3, 3, 3]])
     shifts = [0, 1, 2, 3]
 
+    # Test 1
     trajs = analyze_traj.stitch_time_series(files, rep_trajs, shifts)
     assert trajs[0] == [
         0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
@@ -88,31 +90,108 @@ def test_stitch_time_series():
     assert os.path.exists('state_trajs.npy')
     os.remove('state_trajs.npy')
 
+    # Test 2: Treat the dhdl files as other types of xvg files
+    # Here the time series will be read as is and not shifting is done.
+    trajs = analyze_traj.stitch_time_series(files, rep_trajs, dhdl=False, col_idx=1)
+
+    assert trajs[0] == [
+        0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+        1, 1, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 1, 0, 1, 1,
+        0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4
+    ]
+    assert trajs[1] == [
+        0, 0, 1, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0,
+        1, 1, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 1, 2, 2, 2, 1, 1,
+        1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 0, 2, 1, 1, 0, 0, 1, 0, 1, 0, 1
+    ]
+    assert trajs[2] == [
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 2, 1, 1, 0,
+        1, 1, 0, 0, 0, 1, 2, 1, 2, 2, 3, 3, 3, 3, 2, 1, 2, 1, 1, 2, 2
+    ]
+    assert trajs[3] == [
+        0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2,
+        1, 1, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2,
+        3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 2, 3, 3, 3, 4, 3, 4
+    ]
+
+    assert os.path.exists('cv_trajs.npy')
+    os.remove('cv_trajs.npy')
+
 
 def test_stitch_time_series_for_sim():
-    # Set up files for testing
-    for sim in range(2):
-        for iteration in range(2):
-            target_dir = f'ensemble_md/tests/data/stitch_test/sim_{sim}/iteration_{iteration}'
-            os.makedirs(target_dir)
-            shutil.copy(f'ensemble_md/tests/data/dhdl/dhdl_{sim * 2 + iteration}.xvg', f'{target_dir}/dhdl.xvg')
-            save_and_exclude(f'{target_dir}/dhdl.xvg', 40)  # just keep the first 10 frames
-
-    # files = [[f'ensemble_md/tests/data/stitch_test/sim_{i}/iteration_{j}/dhdl_short.xvg' for j in range(2)] for i in range(2)]  # noqa: E501
-    # shifts = [1, 1]
-
-    # More to come ...
-    # trajs_test = analyze_traj.stitch_time_series_for_sim(files, shifts, save=True)
-    # trajs_expected = [
-    #     [0, 0, 3, 1, 4, 4, 5, 4, 5, 5, 4]
-    # ]
+    folder = os.path.join(input_path, 'dhdl/simulation_example')
+    files = [[f'{folder}/sim_{i}/iteration_{j}/dhdl.xvg' for j in range(3)] for i in range(4)]
+    shifts = [0, 1, 2, 3]
 
-    # Clean up
-    shutil.rmtree('ensemble_md/tests/data/stitch_test')
+    # Test 1
+    trajs = analyze_traj.stitch_time_series_for_sim(files, shifts)
 
+    trajs[0] == [
+        0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 
+        1, 1, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 1, 0, 1, 1,
+        1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 0, 2, 1, 1, 0, 0, 1, 0, 1, 0, 1
+    ]
 
-def test_stitch_xtc_trajs():
-    pass
+    trajs[1] == [
+        1, 1, 2, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 2, 3, 2, 1, 1, 1, 1,
+        2, 2, 1, 1, 1, 1, 1, 2, 3, 2, 1, 1, 1, 1, 2, 3, 3, 3, 2, 2,
+        1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 0, 2, 1, 1, 0, 0, 1, 0, 1, 0, 1
+    ]
+
+    trajs[2] == [
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2, 3, 3,
+        3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 3, 3, 2, 2, 3, 4, 3, 3, 2,
+        3, 3, 2, 2, 2, 3, 4, 3, 4, 4, 5, 5, 5, 5, 4, 3, 4, 3, 3, 4, 4
+    ]
+
+    trajs[3] == [
+        3, 3, 3, 3, 3, 3, 3, 5, 4, 4, 5, 4, 4, 5, 4, 5, 5, 5, 4, 5,
+        4, 4, 5, 4, 5, 5, 4, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 4, 5, 5,
+        6, 6, 6, 5, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 5, 6, 6, 6, 7, 6, 7
+    ]
+
+    assert os.path.exists('state_trajs_for_sim.npy')
+    os.remove('state_trajs_for_sim.npy')
+
+    # Test 2: Test for discontinuous time series
+    # Here, for sim_2, we exclude the last 5 lines for the dhdl.xvg file in iteration_1 to create a gap
+    save_and_exclude(f'{folder}/sim_2/iteration_1/dhdl.xvg', 5)
+    os.rename(f'{folder}/sim_2/iteration_1/dhdl.xvg', f'{folder}/sim_2/iteration_1/dhdl_temp.xvg')
+    os.rename(f'{folder}/sim_2/iteration_1/dhdl_short.xvg', f'{folder}/sim_2/iteration_1/dhdl.xvg')
+
+    match_str = 'The first frame of iteration 2 in replica 2 is not continuous with the last frame of the previous iteration. '
+    match_str += f'Please check files {folder}/sim_2/iteration_1/dhdl.xvg and {folder}/sim_2/iteration_2/dhdl.xvg'
+    with pytest.raises(ValueError, match=match_str):
+        trajs = analyze_traj.stitch_time_series_for_sim(files, shifts)
+
+    # Delete dhdl_short.xvg and rename dhdl_temp.xvg back to dhdl.xvg
+    os.remove(f'{folder}/sim_2/iteration_1/dhdl.xvg')
+    os.rename(f'{folder}/sim_2/iteration_1/dhdl_temp.xvg', f'{folder}/sim_2/iteration_1/dhdl.xvg')
+
+
+@patch('ensemble_md.analysis.analyze_traj.utils.run_gmx_cmd')
+def test_stitch_xtc_trajs(mock_gmx):
+    # Here we mock run_gmx_cmd so we don't need to call GROMACS and don't need example xtc files.
+    folder = os.path.join(input_path, 'dhdl/simulation_example')
+    files = [[f'{folder}/sim_{i}/iteration_{j}/md.xtc' for j in range(3)] for i in range(4)]
+    rep_trajs = np.array([[0, 0, 1], [1, 1, 0], [2, 2, 2], [3, 3, 3]])
+
+    mock_rtn, mock_stdout, mock_stderr = MagicMock(), MagicMock(), MagicMock()
+    mock_gmx.return_value = mock_rtn, mock_stdout, mock_stderr
+
+    analyze_traj.stitch_xtc_trajs('gmx', files, rep_trajs)
+
+    args_1 = ['gmx', 'trjcat', '-f', f'{folder}/sim_0/iteration_0/md.xtc', f'{folder}/sim_0/iteration_1/md.xtc', f'{folder}/sim_1/iteration_2/md.xtc', '-o', 'traj_0.xtc']  # noqa: E501
+    args_2 = ['gmx', 'trjcat', '-f', f'{folder}/sim_1/iteration_0/md.xtc', f'{folder}/sim_1/iteration_1/md.xtc', f'{folder}/sim_0/iteration_2/md.xtc', '-o', 'traj_1.xtc']  # noqa: E501
+    args_3 = ['gmx', 'trjcat', '-f', f'{folder}/sim_2/iteration_0/md.xtc', f'{folder}/sim_2/iteration_1/md.xtc', f'{folder}/sim_2/iteration_2/md.xtc', '-o', 'traj_2.xtc']  # noqa: E501
+    args_4 = ['gmx', 'trjcat', '-f', f'{folder}/sim_3/iteration_0/md.xtc', f'{folder}/sim_3/iteration_1/md.xtc', f'{folder}/sim_3/iteration_2/md.xtc', '-o', 'traj_3.xtc']  # noqa: E501
+
+    assert mock_gmx.call_count == 4
+    assert mock_gmx.call_args_list[0][0][0] == args_1
+    assert mock_gmx.call_args_list[1][0][0] == args_2
+    assert mock_gmx.call_args_list[2][0][0] == args_3
+    assert mock_gmx.call_args_list[3][0][0] == args_4
 
 
 def test_convert_npy2xvg():

From 91806a83cc8cd4a89697808181983eb4b1c0f5da Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 9 Apr 2024 04:47:53 +0800
Subject: [PATCH 37/41] Added an example run_REXEE_log.txt and a unit test for
 get_swaps

---
 ensemble_md/analysis/analyze_traj.py     |   6 +-
 ensemble_md/tests/data/run_REXEE_log.txt | 363 +++++++++++++++++++++++
 ensemble_md/tests/test_analyze_traj.py   |  22 +-
 3 files changed, 384 insertions(+), 7 deletions(-)
 create mode 100644 ensemble_md/tests/data/run_REXEE_log.txt

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 7db40204..996fdda1 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -105,7 +105,7 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
                 traj = list(np.array(traj) + shifts[shift_idx])
             else:
                 traj = np.loadtxt(files_sorted[i][j], comments=['#', '@'])[:, col_idx]
-            
+
             if j != n_iter - 1:
                 traj = traj[:-1]
 
@@ -180,7 +180,7 @@ def stitch_time_series_for_sim(files, shifts, dhdl=True, col_idx=-1, save=True):
                 traj = traj[:-1]  # remove the last frame, which is the same as the first of the next time series.
 
             trajs[i].extend(traj)
-        
+
         # All segments for the same replica should have the same shift
         trajs[i] = list(np.array(trajs[i]) + shifts[i])
 
@@ -835,7 +835,7 @@ def plot_g_vecs(g_vecs, refs=None, refs_err=None, plot_rmse=True):
 
 def get_swaps(REXEE_log='run_REXEE_log.txt'):
     """
-    For each replica, identifies the states where exchanges were proposed and accepted.
+    For each replica, identifies the states involved in proposed and accepted exchanges.
     (Todo: We should be able to only use :code:`rep_trajs.npy` and :code:`state_trajs.npy`
     instead of parsing the REXEE log file to reach the same goal.)
 
diff --git a/ensemble_md/tests/data/run_REXEE_log.txt b/ensemble_md/tests/data/run_REXEE_log.txt
new file mode 100644
index 00000000..471996a0
--- /dev/null
+++ b/ensemble_md/tests/data/run_REXEE_log.txt
@@ -0,0 +1,363 @@
+Current time: 19/06/2023 04:08:37
+Command line: /jet/home/wehs7661/.local/python3.9/bin/run_EEXE
+
+Important parameters of EXEE
+============================
+Python version: 3.9.12 (main, Feb 28 2023, 14:59:18) 
+[GCC 10.2.0]
+GROMACS executable: /jet/home/wehs7661/pkgs/gromacs/2022.5/bin/gmx
+GROMACS version: 2022.5-dev-20230428-fdf57150ad
+ensemble_md version: 0.6.0+40.gba0d9e6.dirty
+Simulation inputs: anthracene.gro, anthracene.top, expanded.mdp
+Verbose log file: True
+Proposal scheme: exhaustive
+Acceptance scheme for swapping simulations: metropolis
+Whether to perform weight combination: False
+Histogram cutoff: -1
+Number of replicas: 4
+Number of iterations: 12500
+Number of attempted swaps in one exchange interval: N^3
+Length of each replica: 4.0 ps
+Frequency for checkpointing: 100 iterations
+Total number of states: 8
+Additional grompp arguments: None
+Additional runtime arguments: {'-nt': '16', '-ntmpi': '1'}
+Alchemical ranges of each replica in EEXE:
+  - Replica 0: States [0, 1, 2, 3, 4]
+  - Replica 1: States [1, 2, 3, 4, 5]
+  - Replica 2: States [2, 3, 4, 5, 6]
+  - Replica 3: States [3, 4, 5, 6, 7]
+
+Iteration 0:  0.0 -  4.0 ps
+===========================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 1: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 2: Global state 3, (coul, vdw) =                 (0.57,)
+  Simulation 3: Global state 4, (coul, vdw) =                 (0.68,)
+
+Parsing sim_0/iteration_0/md.log ...
+Parsing sim_1/iteration_0/md.log ...
+Parsing sim_2/iteration_0/md.log ...
+Parsing sim_3/iteration_0/md.log ...
+
+Swappable pairs: [(0, 1), (1, 2), (2, 3)]
+
+Proposed swap: (2, 3)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 1.91 kT, U^j_m - U^j_n = 6.93 kT, Total dU: 8.84 kT
+  g^i_n - g^i_m = 0.43 kT, g^j_m - g^j_n = -0.43 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.000 / Random number drawn: 0.212
+  Swap rejected! 
+  Current list of configurations: [0, 1, 2, 3]
+
+Remaining swappable pairs: [(0, 1)]
+
+Proposed swap: (0, 1)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 5.12 kT, U^j_m - U^j_n = 1.53 kT, Total dU: 6.65 kT
+  g^i_n - g^i_m = 4.94 kT, g^j_m - g^j_n = -4.94 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.001 / Random number drawn: 0.958
+  Swap rejected! 
+  Current list of configurations: [0, 1, 2, 3]
+
+The finally adopted swap pattern: [0, 1, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [0, 1, 2, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 1:  4.0 -  8.0 ps
+===========================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 1: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 2: Global state 3, (coul, vdw) =                 (0.57,)
+  Simulation 3: Global state 6, (coul, vdw) =                 (0.86,)
+
+Parsing sim_0/iteration_1/md.log ...
+Parsing sim_1/iteration_1/md.log ...
+Parsing sim_2/iteration_1/md.log ...
+Parsing sim_3/iteration_1/md.log ...
+
+Swappable pairs: [(0, 1), (2, 3)]
+
+Proposed swap: (0, 1)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 0.00 kT, U^j_m - U^j_n = 0.00 kT, Total dU: 0.00 kT
+  g^i_n - g^i_m = 0.00 kT, g^j_m - g^j_n = 0.00 kT, Total dg: 0.00 kT
+  Acceptance rate: 1.000 / Random number drawn: 0.047
+  Swap accepted! 
+  Current list of configurations: [1, 0, 2, 3]
+
+Remaining swappable pairs: [(2, 3)]
+
+Proposed swap: (2, 3)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 8.64 kT, U^j_m - U^j_n = 41.64 kT, Total dU: 50.28 kT
+  g^i_n - g^i_m = -5.23 kT, g^j_m - g^j_n = 5.23 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.000 / Random number drawn: 0.347
+  Swap rejected! 
+  Current list of configurations: [1, 0, 2, 3]
+
+The finally adopted swap pattern: [1, 0, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [1, 0, 2, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 2:  8.0 -  12.0 ps
+============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 1: Global state 5, (coul, vdw) =                 (0.76,)
+  Simulation 2: Global state 4, (coul, vdw) =                 (0.68,)
+  Simulation 3: Global state 7, (coul, vdw) =                 (1.0,)
+
+Parsing sim_0/iteration_2/md.log ...
+Parsing sim_1/iteration_2/md.log ...
+Parsing sim_2/iteration_2/md.log ...
+Parsing sim_3/iteration_2/md.log ...
+
+n_ex is set back to 1 since there is only 1 swappable pair.
+Swappable pairs: [(1, 2)]
+
+Proposed swap: (1, 2)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 9.19 kT, U^j_m - U^j_n = -2.31 kT, Total dU: 6.88 kT
+  g^i_n - g^i_m = 1.45 kT, g^j_m - g^j_n = -1.45 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.001 / Random number drawn: 0.303
+  Swap rejected! 
+  Current list of configurations: [1, 0, 2, 3]
+
+The finally adopted swap pattern: [0, 1, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [1, 0, 2, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 3:  12.0 -  16.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 0, (coul, vdw) =                 (0.0,)
+  Simulation 1: Global state 5, (coul, vdw) =                 (0.76,)
+  Simulation 2: Global state 6, (coul, vdw) =                 (0.86,)
+  Simulation 3: Global state 7, (coul, vdw) =                 (1.0,)
+
+Parsing sim_0/iteration_3/md.log ...
+Parsing sim_1/iteration_3/md.log ...
+Parsing sim_2/iteration_3/md.log ...
+Parsing sim_3/iteration_3/md.log ...
+
+Swappable pairs: []
+
+The finally adopted swap pattern: [0, 1, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [1, 0, 2, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 4:  16.0 -  20.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 1: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 2: Global state 3, (coul, vdw) =                 (0.57,)
+  Simulation 3: Global state 6, (coul, vdw) =                 (0.86,)
+
+Parsing sim_0/iteration_4/md.log ...
+Parsing sim_1/iteration_4/md.log ...
+Parsing sim_2/iteration_4/md.log ...
+Parsing sim_3/iteration_4/md.log ...
+
+Swappable pairs: [(0, 1), (2, 3)]
+
+Proposed swap: (0, 1)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 0.00 kT, U^j_m - U^j_n = 0.00 kT, Total dU: 0.00 kT
+  g^i_n - g^i_m = 0.00 kT, g^j_m - g^j_n = 0.00 kT, Total dg: 0.00 kT
+  Acceptance rate: 1.000 / Random number drawn: 0.601
+  Swap accepted! 
+  Current list of configurations: [0, 1, 2, 3]
+
+Remaining swappable pairs: [(2, 3)]
+
+Proposed swap: (2, 3)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 7.79 kT, U^j_m - U^j_n = 52.74 kT, Total dU: 60.53 kT
+  g^i_n - g^i_m = -5.23 kT, g^j_m - g^j_n = 5.23 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.000 / Random number drawn: 0.530
+  Swap rejected! 
+  Current list of configurations: [0, 1, 2, 3]
+
+The finally adopted swap pattern: [1, 0, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [0, 1, 2, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 5:  20.0 -  24.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 1: Global state 4, (coul, vdw) =                 (0.68,)
+  Simulation 2: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 3: Global state 6, (coul, vdw) =                 (0.86,)
+
+Parsing sim_0/iteration_5/md.log ...
+Parsing sim_1/iteration_5/md.log ...
+Parsing sim_2/iteration_5/md.log ...
+Parsing sim_3/iteration_5/md.log ...
+
+Swappable pairs: [(0, 1), (0, 2), (1, 2)]
+
+Proposed swap: (0, 2)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 0.00 kT, U^j_m - U^j_n = 0.00 kT, Total dU: 0.00 kT
+  g^i_n - g^i_m = 0.00 kT, g^j_m - g^j_n = 0.00 kT, Total dg: 0.00 kT
+  Acceptance rate: 1.000 / Random number drawn: 0.883
+  Swap accepted! 
+  Current list of configurations: [2, 1, 0, 3]
+
+Remaining swappable pairs: []
+
+The finally adopted swap pattern: [2, 1, 0, 3]
+The list of configurations sampled in each replica in the next iteration: [2, 1, 0, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 6:  24.0 -  28.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 0, (coul, vdw) =                 (0.0,)
+  Simulation 1: Global state 4, (coul, vdw) =                 (0.68,)
+  Simulation 2: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 3: Global state 7, (coul, vdw) =                 (1.0,)
+
+Parsing sim_0/iteration_6/md.log ...
+Parsing sim_1/iteration_6/md.log ...
+Parsing sim_2/iteration_6/md.log ...
+Parsing sim_3/iteration_6/md.log ...
+
+n_ex is set back to 1 since there is only 1 swappable pair.
+Swappable pairs: [(1, 2)]
+
+Proposed swap: (1, 2)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 11.12 kT, U^j_m - U^j_n = 6.74 kT, Total dU: 17.86 kT
+  g^i_n - g^i_m = -2.63 kT, g^j_m - g^j_n = 2.63 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.000 / Random number drawn: 0.041
+  Swap rejected! 
+  Current list of configurations: [2, 1, 0, 3]
+
+The finally adopted swap pattern: [0, 1, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [2, 1, 0, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 7:  28.0 -  32.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 0, (coul, vdw) =                 (0.0,)
+  Simulation 1: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 2: Global state 4, (coul, vdw) =                 (0.68,)
+  Simulation 3: Global state 6, (coul, vdw) =                 (0.86,)
+
+Parsing sim_0/iteration_7/md.log ...
+Parsing sim_1/iteration_7/md.log ...
+Parsing sim_2/iteration_7/md.log ...
+Parsing sim_3/iteration_7/md.log ...
+
+n_ex is set back to 1 since there is only 1 swappable pair.
+Swappable pairs: [(2, 3)]
+
+Proposed swap: (2, 3)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 0.02 kT, U^j_m - U^j_n = 17.39 kT, Total dU: 17.41 kT
+  g^i_n - g^i_m = -5.66 kT, g^j_m - g^j_n = 5.66 kT, Total dg: 0.00 kT
+  Acceptance rate: 0.000 / Random number drawn: 0.494
+  Swap rejected! 
+  Current list of configurations: [2, 1, 0, 3]
+
+The finally adopted swap pattern: [0, 1, 2, 3]
+The list of configurations sampled in each replica in the next iteration: [2, 1, 0, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 8:  32.0 -  36.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+Below are the final states being visited:
+  Simulation 0: Global state 1, (coul, vdw) =                 (0.18,)
+  Simulation 1: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 2: Global state 2, (coul, vdw) =                 (0.42,)
+  Simulation 3: Global state 7, (coul, vdw) =                 (1.0,)
+
+Parsing sim_0/iteration_8/md.log ...
+Parsing sim_1/iteration_8/md.log ...
+Parsing sim_2/iteration_8/md.log ...
+Parsing sim_3/iteration_8/md.log ...
+
+Swappable pairs: [(0, 1), (1, 2)]
+
+Proposed swap: (1, 2)
+  Proposing a move from (x^i_m, x^j_n) to (x^i_n, x^j_m) ...
+  U^i_n - U^i_m = 0.00 kT, U^j_m - U^j_n = 0.00 kT, Total dU: 0.00 kT
+  g^i_n - g^i_m = 0.00 kT, g^j_m - g^j_n = 0.00 kT, Total dg: 0.00 kT
+  Acceptance rate: 1.000 / Random number drawn: 0.254
+  Swap accepted! 
+  Current list of configurations: [2, 0, 1, 3]
+
+Remaining swappable pairs: []
+
+The finally adopted swap pattern: [0, 2, 1, 3]
+The list of configurations sampled in each replica in the next iteration: [2, 0, 1, 3]
+
+Note: No histogram correction will be performed.
+Note: No weight combination will be performed.
+
+Iteration 9:  36.0 -  40.0 ps
+=============================
+Generating a TPR file on rank 0 ...
+Running an EXE simulation on rank 0 ...
+
+----- Saving .npy files to checkpoint the simulation ---
+
+Summary of the simulation ensemble
+==================================
+Simulation status:
+- Rep 0: The weights were fixed throughout the simulation.
+- Rep 1: The weights were fixed throughout the simulation.
+- Rep 2: The weights were fixed throughout the simulation.
+- Rep 3: The weights were fixed throughout the simulation.
+
+Time elapsed: xx hour(s) xx minute(s) xx second(s)
\ No newline at end of file
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index d29d040e..73b0dfb2 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -12,7 +12,6 @@
 """
 import os
 import pytest
-import shutil
 import numpy as np
 from unittest.mock import patch, MagicMock
 from ensemble_md.analysis import analyze_traj
@@ -128,7 +127,7 @@ def test_stitch_time_series_for_sim():
     trajs = analyze_traj.stitch_time_series_for_sim(files, shifts)
 
     trajs[0] == [
-        0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 
+        0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
         1, 1, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 1, 0, 1, 1,
         1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 0, 2, 1, 1, 0, 0, 1, 0, 1, 0, 1
     ]
@@ -160,7 +159,7 @@ def test_stitch_time_series_for_sim():
     os.rename(f'{folder}/sim_2/iteration_1/dhdl.xvg', f'{folder}/sim_2/iteration_1/dhdl_temp.xvg')
     os.rename(f'{folder}/sim_2/iteration_1/dhdl_short.xvg', f'{folder}/sim_2/iteration_1/dhdl.xvg')
 
-    match_str = 'The first frame of iteration 2 in replica 2 is not continuous with the last frame of the previous iteration. '
+    match_str = 'The first frame of iteration 2 in replica 2 is not continuous with the last frame of the previous iteration. '  # noqa: E501
     match_str += f'Please check files {folder}/sim_2/iteration_1/dhdl.xvg and {folder}/sim_2/iteration_2/dhdl.xvg'
     with pytest.raises(ValueError, match=match_str):
         trajs = analyze_traj.stitch_time_series_for_sim(files, shifts)
@@ -788,7 +787,22 @@ def test_plot_g_vecs(mock_plt):
 
 
 def test_get_swaps():
-    pass
+    input_file = os.path.join('ensemble_md/tests/data', 'run_REXEE_log.txt')
+    proposed_swaps, accepted_swaps = analyze_traj.get_swaps(input_file)
+
+    assert proposed_swaps == [
+        {0: 0, 1: 3, 2: 1, 3: 0, 4: 0},
+        {1: 2, 2: 2, 3: 0, 4: 1, 5: 1},
+        {2: 3, 3: 3, 4: 2, 5: 0, 6: 0},
+        {3: 0, 4: 1, 5: 0, 6: 3, 7: 0},
+    ]
+
+    assert accepted_swaps == [
+        {0: 0, 1: 2, 2: 1, 3: 0, 4: 0},
+        {1: 2, 2: 1, 3: 0, 4: 0, 5: 0},
+        {2: 2, 3: 0, 4: 0, 5: 0, 6: 0},
+        {3: 0, 4: 0, 5: 0, 6: 0, 7: 0},
+    ]
 
 
 def test_plot_swaps():

From 42fa84af657b7debb5a78f08482fac6bf2c4980e Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 9 Apr 2024 05:22:16 +0800
Subject: [PATCH 38/41] Added a unit test for plot_swaps

---
 ensemble_md/analysis/analyze_traj.py   | 14 ++--
 ensemble_md/tests/test_analyze_traj.py | 93 +++++++++++++++++++++++++-
 2 files changed, 101 insertions(+), 6 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 996fdda1..e49ab3e0 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -835,7 +835,7 @@ def plot_g_vecs(g_vecs, refs=None, refs_err=None, plot_rmse=True):
 
 def get_swaps(REXEE_log='run_REXEE_log.txt'):
     """
-    For each replica, identifies the states involved in proposed and accepted exchanges.
+    For each replica, identifies the states involved in proposed and accepted.
     (Todo: We should be able to only use :code:`rep_trajs.npy` and :code:`state_trajs.npy`
     instead of parsing the REXEE log file to reach the same goal.)
 
@@ -850,12 +850,12 @@ def get_swaps(REXEE_log='run_REXEE_log.txt'):
         A list of dictionaries showing where the swaps were proposed in
         each replica. Each dictionary (corresponding to one replica) have
         keys being the global state indices and values being the number of
-        proposed swaps that occurred in the state indicated by the key.
+        proposed swaps that involved the state indicated by the key.
     accepted_swaps : list
         A list of dictionaries showing where the swaps were accepted in
         each replica. Each dictionary (corresponding to one replica) have
         keys being the global state indices and values being the number of
-        accepted swaps that occurred in the state indicated by the key.
+        accepted swaps that involved the state indicated by the key.
     """
     f = open(REXEE_log, 'r')
     lines = f.readlines()
@@ -963,10 +963,16 @@ def plot_swaps(swaps, swap_type='', stack=True, figsize=None):
         if i == n_sim - 1:
             bounds[1] += 0.5
         plt.fill_betweenx([y_min, y_max], x1=bounds[1] + 0.5, x2=bounds[0] - 0.5, color=colors[i], alpha=0.1, zorder=0)
+
     plt.xlim([lower_bound, upper_bound])
     # plt.ylim([y_min, y_max])
+
     plt.xlabel('State')
-    plt.ylabel(f'Number of {swap_type} swaps')
+    if swap_type == '':
+        plt.ylabel('Number of swaps')
+    else:
+        plt.ylabel(f'Number of {swap_type} swaps')
+
     plt.grid()
     plt.legend()
     plt.tight_layout()
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index 73b0dfb2..aa8f1eee 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -805,8 +805,97 @@ def test_get_swaps():
     ]
 
 
-def test_plot_swaps():
-    pass
+@patch('ensemble_md.analysis.analyze_traj.plt')
+def test_plot_swaps(mock_plt):
+    swaps = [
+        {0: 0, 1: 3, 2: 1, 3: 0, 4: 0},
+        {1: 2, 2: 2, 3: 0, 4: 1, 5: 1},
+        {2: 3, 3: 3, 4: 2, 5: 0, 6: 0},
+        {3: 0, 4: 1, 5: 0, 6: 3, 7: 0},
+    ]
+
+    # Test 1: The case not specifying the swap_type
+    cmap = mock_plt.cm.ocean
+    colors = [cmap(i) for i in np.arange(8) / 8]
+    mock_fig, mock_ax = MagicMock(), MagicMock()
+    mock_plt.figure.return_value = mock_fig
+    mock_fig.add_subplot.return_value = mock_ax
+
+    mock_min, mock_max = MagicMock(), MagicMock()
+    mock_ax.get_ylim.return_value = (mock_min, mock_max)
+
+    analyze_traj.plot_swaps(swaps, stack=True)
+
+    mock_plt.figure.assert_called_once_with(figsize=(6.4, 4.8))
+    mock_fig.add_subplot.assert_called_once_with(111)
+    mock_plt.bar.assert_called()
+    mock_plt.xticks.assert_called_once_with(range(8))
+    mock_plt.fill_betweenx.assert_called()
+    mock_plt.xlim.assert_called_once_with([-0.5, 7.5])
+    mock_plt.xlabel.assert_called_once_with('State')
+    mock_plt.ylabel.assert_called_once_with('Number of swaps')
+    mock_plt.grid.assert_called_once()
+    mock_plt.legend.assert_called_once()
+    mock_plt.tight_layout.assert_called_once()
+    mock_plt.savefig.assert_called_once_with('swaps.png', dpi=600)
+
+    counts_list = [
+        [0, 3, 1, 0, 0, 0, 0, 0],
+        [0, 2, 2, 0, 1, 1, 0, 0],
+        [0, 0, 3, 3, 2, 0, 0, 0],
+        [0, 0, 0, 0, 1, 0, 3, 0],
+    ]
+    assert mock_plt.bar.call_count == 4
+    assert [mock_plt.bar.call_args_list[i][0][0] for i in range(4)] == [range(8)] * 4
+    assert [mock_plt.bar.call_args_list[i][0][1] for i in range(4)] == [counts_list[i] for i in range(4)]
+    assert mock_plt.bar.call_args_list[0][1] == {
+        'align': 'center',
+        'width': 1,
+        'color': colors[0],
+        'edgecolor': 'black',
+        'label': 'Replica 0',
+        'alpha': 0.5,
+        'bottom': [0, 0, 0, 0, 0, 0, 0, 0]
+    }
+    assert mock_plt.bar.call_args_list[1][1] == {
+        'align': 'center',
+        'width': 1,
+        'color': colors[1],
+        'edgecolor': 'black',
+        'label': 'Replica 1',
+        'alpha': 0.5,
+        'bottom': [0, 3, 1, 0, 0, 0, 0, 0]
+    }
+    assert mock_plt.bar.call_args_list[2][1] == {
+        'align': 'center',
+        'width': 1,
+        'color': colors[2],
+        'edgecolor': 'black',
+        'label': 'Replica 2',
+        'alpha': 0.5,
+        'bottom': [0, 5, 3, 0, 1, 1, 0, 0]
+    }
+    assert mock_plt.bar.call_args_list[3][1] == {
+        'align': 'center',
+        'width': 1,
+        'color': colors[3],
+        'edgecolor': 'black',
+        'label': 'Replica 3',
+        'alpha': 0.5,
+        'bottom': [0, 5, 6, 3, 3, 1, 0, 0]
+    }
+
+    # Below we only check the keyword arguments of the fill_betweenx calls
+    assert mock_plt.fill_betweenx.call_args_list[0][1] == {'x1': 4.5, 'x2': -1, 'color': colors[0], 'alpha': 0.1, 'zorder': 0}  # noqa: E501
+    assert mock_plt.fill_betweenx.call_args_list[1][1] == {'x1': 5.5, 'x2': 0.5, 'color': colors[1], 'alpha': 0.1, 'zorder': 0}  # noqa: E501
+    assert mock_plt.fill_betweenx.call_args_list[2][1] == {'x1': 6.5, 'x2': 1.5, 'color': colors[2], 'alpha': 0.1, 'zorder': 0}  # noqa: E501
+    assert mock_plt.fill_betweenx.call_args_list[3][1] == {'x1': 8, 'x2': 2.5, 'color': colors[3], 'alpha': 0.1, 'zorder': 0}  # noqa: E501
+
+    # Test 2: The case specifying the swap_type
+    mock_plt.reset_mock()
+    analyze_traj.plot_swaps(swaps, swap_type='proposed')
+    mock_plt.ylabel.assert_called_with('Number of proposed swaps')
+    mock_plt.savefig.assert_called_once_with('proposed_swaps.png', dpi=600)
 
 
 def test_get_g_evolution():

From f18c1add17cb71b7f5749e51d80cf631ee4dffea Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 9 Apr 2024 08:50:38 +0800
Subject: [PATCH 39/41] Tweaked get_g_evolution and get_dg_evolution and added
 unit tests for them

---
 ensemble_md/analysis/analyze_traj.py   |  40 +++++---
 ensemble_md/tests/test_analyze_traj.py | 125 ++++++++++++++++++++++++-
 2 files changed, 148 insertions(+), 17 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index e49ab3e0..8d178a10 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -982,7 +982,7 @@ def plot_swaps(swaps, swap_type='', stack=True, figsize=None):
         plt.savefig(f'{swap_type}_swaps.png', dpi=600)
 
 
-def get_g_evolution(log_files, N_states, avg_frac=0, avg_from_last_update=False):
+def get_g_evolution(log_files, start_state, end_state, avg_frac=0, avg_from_last_update=False):
     """
     For weight-updating simulations, gets the time series of the alchemical
     weights of all states. Note that this funciton is only suitable for analyzing
@@ -992,29 +992,38 @@ def get_g_evolution(log_files, N_states, avg_frac=0, avg_from_last_update=False)
     Parameters
     ----------
     log_files : list
-        The list of log file names.
-    N_states : int
-        The total number of states in the whole alchemical range.
+        The list of log file names. If multiple log files are provided (for a REXEE)
+        simulations, please make sure the files are in the correct order.
+    start_state : int
+        The index of the first state of interest. The index starts from 0.
+    end_state : int
+        The index of the last state of interest. The index start from 0. For example, if :code:`start_state`
+        is set to 1 and :code:`end_state` is set to 3, then the weight evolution for
+        states 1, 2 and 3 will be extracted.
     avg_frac : float
         The fraction of the last part of the simulation to be averaged. The
         default is 0, which means no averaging. Note that this parameter is
         ignored if :code:`avg_from_last_update` is :code:`True`.
     avg_from_last_update : bool
-        Whether to average from the last update of wl-delta. If False, the
-        averaging will be from the beginning of the simulation.
+        Whether to average from the last update of wl-delta. If this option is set to False,
+        or the option is set to True but the wl-delta was not updated in the provided log
+        file(s), the all weights will be used for averging.
 
     Returns
     -------
     g_vecs_all : list
         The alchemical weights of all states as a function of time.
-        It should be a list of lists.
+        It should be a list of lists. For example, :code:`g_vecs_all[i]` should be the
+        alchemical weights of all states at time frame with index :code:`i`.
+        Weights after equilibration are not included.
     g_vecs_avg : list
         The alchemical weights of all states averaged over the last part of
         the simulation. If :code:`avg_frac` is 0, :code:`None` will be returned.
+        Note that weights after equilibration are not considered.
     g_vecs_err : list
         The errors of the alchemical weights of all states averaged over the
         last part of the simulation. If :code:`avg_frac` is 0 and :code:`avg_from_last_update`
-        is :code:`False`, :code:`None` will be returned.
+        is :code:`False`, :code:`None` will be returned. Note that weights after equilibration are not considered.
     """
     g_vecs_all = []
     idx_updates = []  # the indices of the data points corresponding to the updates of wl-delta
@@ -1029,7 +1038,7 @@ def get_g_evolution(log_files, N_states, avg_frac=0, avg_from_last_update=False)
             n += 1
             if "Count   G(in kT)" in line:  # this line is lines[n]
                 w = []  # the list of weights at this time frame
-                for i in range(1, N_states + 1):
+                for i in range(start_state + 1, end_state + 1):
                     if "<<" in lines[n + i]:
                         w.append(float(lines[n + i].split()[-3]))
                     else:
@@ -1061,7 +1070,11 @@ def get_g_evolution(log_files, N_states, avg_frac=0, avg_from_last_update=False)
         if find_equil is True:
             idx_updates = idx_updates[:-1]
 
-        idx_last_update = idx_updates[-1]
+        if idx_updates == []:
+            print('Note: wl-delta was not updated in the provided log file(s) so all weights are used for averaging.')
+            idx_last_update = -1  # so that all weights are used for averaging
+        else:
+            idx_last_update = idx_updates[-1]
         g_vecs_avg = np.mean(g_vecs_all[idx_last_update + 1:], axis=0)
         g_vecs_err = np.std(g_vecs_all[idx_last_update + 1:], axis=0, ddof=1)
     else:
@@ -1084,7 +1097,8 @@ def get_dg_evolution(log_files, start_state, end_state):
     Parameters
     ----------
     log_files : list
-        The list of log file names.
+        The list of log file names. If multiple log files are provided (for a REXEE)
+        simulations, please make sure the files are in the correct order.
     start_state : int
         The index of the state (starting from 0) whose weight is :math:`g_1`.
     end_state : int
@@ -1095,8 +1109,8 @@ def get_dg_evolution(log_files, start_state, end_state):
     dg : list
         A list of :math:`Δg` values.
     """
-    N_states = end_state - start_state + 1  # number of states for the range of insterest
-    g_vecs = get_g_evolution(log_files, N_states)
+    # N_states = end_state - start_state + 1  # number of states for the range of insterest
+    g_vecs, _, _ = get_g_evolution(log_files, start_state, end_state)
     dg = [g_vecs[i][end_state] - g_vecs[i][start_state] for i in range(len(g_vecs))]
 
     return dg
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index aa8f1eee..c8d7932d 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -898,12 +898,129 @@ def test_plot_swaps(mock_plt):
     mock_plt.savefig.assert_called_once_with('proposed_swaps.png', dpi=600)
 
 
-def test_get_g_evolution():
-    pass
+@patch('ensemble_md.analysis.analyze_traj.np')
+def test_get_g_evolution(mock_np):
+    # Here instead of checking the values of g_vecs_avg and g_vecs_err, we check the inputs to np.mean and np.std
+
+    # Test 1: A standard case where two log files are passed with default parameters
+    g_vecs_all, g_vecs_avg, g_vecs_err = analyze_traj.get_g_evolution(
+        log_files=['ensemble_md/tests/data/log/EXE_0.log', 'ensemble_md/tests/data/log/EXE_1.log'],
+        start_state=0,
+        end_state=6
+    )
+    assert g_vecs_all == [
+        [0, 3.83101, 4.95736, 5.63808, 6.07220, 6.13408],
+        [0, 3.43101, 3.75736, 5.23808, 4.87220, 5.33408],
+        [0, 2.63101, 2.95736, 5.23808, 4.47220, 5.73408],
+        [0, 1.83101, 2.55736, 4.43808, 4.47220, 6.13408],
+        [0, 1.03101, 2.55736, 3.63808, 4.47220, 6.13408],
+        [0, 0.72635, 0.80707, 1.44120, 2.10308, 4.03106],
+        [0, 0.72635, 1.30707, 1.44120, 2.10308, 4.53106],
+        [0, 0.72635, 2.80707, 2.94120, 4.10308, 6.53106],
+        [0, 1.72635, 2.30707, 2.44120, 5.10308, 6.53106],
+        [0, 1.22635, 2.30707, 2.44120, 4.10308, 6.03106],
+    ]
+    assert g_vecs_avg is None
+    assert g_vecs_err is None
+
+    mock_np.mean.assert_not_called()
+    mock_np.std.assert_not_called()
+
+    # Test 2: Test the avg_frac parameter
+    mock_np.reset_mock()
+    _ = analyze_traj.get_g_evolution(
+        log_files=['ensemble_md/tests/data/log/EXE_0.log'],
+        start_state=0,
+        end_state=6,
+        avg_frac=0.5  # the last 2 out of 5 frames should be used
+    )
+
+    input_weights = np.array([[0, 1.83101, 2.55736, 4.43808, 4.47220, 6.13408], [0, 1.03101, 2.55736, 3.63808, 4.47220, 6.13408]])  # noqa: E501
+
+    assert mock_np.mean.call_count == 1
+    assert mock_np.std.call_count == 1
+    np.testing.assert_array_equal(mock_np.mean.call_args_list[0][0][0], input_weights)
+    np.testing.assert_array_equal(mock_np.std.call_args_list[0][0][0], input_weights)
+    assert mock_np.mean.call_args_list[0][1] == {'axis': 0}
+    assert mock_np.std.call_args_list[0][1] == {'axis': 0, 'ddof': 1}
+
+    # Test 3: Test avg_from_last_update but with a log file where wl-delta was not updated
+    mock_np.reset_mock()
+    _ = analyze_traj.get_g_evolution(
+        ['ensemble_md/tests/data/log/EXE_0.log'],
+        start_state=0,
+        end_state=6,
+        avg_frac=0.5,  # here we check if this option is ignored
+        avg_from_last_update=True  # wl-delta was not updated in EXE_0.log so all weights will be used for mean/std
+    )
+
+    input_weights = [
+        [0, 3.83101, 4.95736, 5.63808, 6.07220, 6.13408],
+        [0, 3.43101, 3.75736, 5.23808, 4.87220, 5.33408],
+        [0, 2.63101, 2.95736, 5.23808, 4.47220, 5.73408],
+        [0, 1.83101, 2.55736, 4.43808, 4.47220, 6.13408],
+        [0, 1.03101, 2.55736, 3.63808, 4.47220, 6.13408],
+    ]
 
+    assert mock_np.mean.call_count == 1
+    assert mock_np.std.call_count == 1
+    np.testing.assert_array_equal(mock_np.mean.call_args_list[0][0][0], input_weights)
+    np.testing.assert_array_equal(mock_np.std.call_args_list[0][0][0], input_weights)
+    assert mock_np.mean.call_args_list[0][1] == {'axis': 0}
+    assert mock_np.std.call_args_list[0][1] == {'axis': 0, 'ddof': 1}
+
+    # Test 4: Test avg_from_last_update and with a log file where wl-delta was indeed updated and
+    # the weights got equilibrated
+    mock_np.reset_mock()
+    g_vecs_all, g_vecs_avg, g_vecs_err = analyze_traj.get_g_evolution(
+        ['ensemble_md/tests/data/log/case2_1.log'],
+        start_state=0,
+        end_state=6,
+        avg_from_last_update=True
+    )
+
+    assert g_vecs_all == [
+        [0, 1.16453, 2.69258, 2.48480, 1.46220, 3.88607],  # 4.2 ps
+        [0, 1.16453, 1.49258, 2.48480, 1.06220, 3.88607],  # 4.4 ps
+        [0, 1.16453, 1.89258, 2.08480, 1.86220, 3.88607],  # 4.5 ps
+        [0, 1.16453, 1.89258, 2.08480, 1.86220, 4.68607],  # 4.8 ps
+        [0, 2.36453, 3.09258, 3.28480, 3.06220, 5.48607],  # 5.0 ps
+        [0, 2.68453, 4.13258, 4.32480, 4.42220, 6.52607],  # 5.2 ps, wl-delta updated
+        [0, 2.36453, 3.49258, 4.00480, 4.74220, 6.20607],  # 5.4 ps
+        [0, 2.36453, 3.17258, 3.36480, 3.78220, 4.92607],  # 5.6 ps
+        [0, 1.40453, 2.85258, 2.08480, 3.14220, 4.92607],  # 5.8 ps
+        [0, 1.40453, 2.53258, 2.40480, 3.14220, 5.56607],  # 6.0 ps, equilibrated at 6.04 ps
+        [0, 1.40453, 2.85258, 2.72480, 3.46220, 5.88607],  # 6.2 ps
+    ]
 
-def test_get_dg_evoluation():
-    pass
+    assert mock_np.mean.call_count == 1
+    assert mock_np.std.call_count == 1
+    np.testing.assert_array_equal(mock_np.mean.call_args_list[0][0][0], g_vecs_all[-6:])
+    np.testing.assert_array_equal(mock_np.std.call_args_list[0][0][0], g_vecs_all[-6:])
+    assert mock_np.mean.call_args_list[0][1] == {'axis': 0}
+    assert mock_np.std.call_args_list[0][1] == {'axis': 0, 'ddof': 1}
+
+
+@patch('ensemble_md.analysis.analyze_traj.get_g_evolution')
+def test_get_dg_evoluation(mock_fn):
+    mock_fn.return_value = ([
+        [0, 3.83101, 4.95736, 5.63808, 6.07220, 6.13408],
+        [0, 3.43101, 3.75736, 5.23808, 4.87220, 5.33408],
+        [0, 2.63101, 2.95736, 5.23808, 4.47220, 5.73408],
+        [0, 1.83101, 2.55736, 4.43808, 4.47220, 6.13408],
+        [0, 1.03101, 2.55736, 3.63808, 4.47220, 6.13408],
+    ], MagicMock(), MagicMock())
+
+    # Test 1
+    dg = analyze_traj.get_dg_evolution(['ensemble_md/tests/data/log/EXE_0.log'], 0, 3)
+    mock_fn.assert_called_once_with(['ensemble_md/tests/data/log/EXE_0.log'], 0, 3)
+    np.testing.assert_array_equal(dg, np.array([5.63808, 5.23808, 5.23808, 4.43808, 3.63808]))
+
+    # Test 2 (just different start_state/end_state values)
+    mock_fn.reset_mock()
+    dg = analyze_traj.get_dg_evolution(['ensemble_md/tests/data/log/EXE_0.log'], 1, 3)
+    mock_fn.assert_called_once_with(['ensemble_md/tests/data/log/EXE_0.log'], 1, 3)
+    np.testing.assert_array_almost_equal(dg, np.array([1.80707, 1.80707, 2.60707, 2.60707, 2.60707]))
 
 
 def test_plot_dg_evolution():

From 0765a20ebf0c4c9b0db9b6086c2bf8cbc86ac7e4 Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 9 Apr 2024 09:17:39 +0800
Subject: [PATCH 40/41] Added a unit test for plot_dg_evolution

---
 ensemble_md/analysis/analyze_traj.py   |  2 +-
 ensemble_md/tests/test_analyze_traj.py | 28 ++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 8d178a10..74927def 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -1116,7 +1116,7 @@ def get_dg_evolution(log_files, start_state, end_state):
     return dg
 
 
-def plot_dg_evolution(log_files, start_state, end_state, start_idx=0, end_idx=-1, dt_log=2):
+def plot_dg_evolution(log_files, start_state, end_state, start_idx=None, end_idx=None, dt_log=2):
     """
     For weight-updating simulations, plots the time series of the weight
     difference (:math:`Δg = g_2-g_1`) between the specified states.
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index c8d7932d..3d7347f7 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -1023,8 +1023,32 @@ def test_get_dg_evoluation(mock_fn):
     np.testing.assert_array_almost_equal(dg, np.array([1.80707, 1.80707, 2.60707, 2.60707, 2.60707]))
 
 
-def test_plot_dg_evolution():
-    pass
+@patch('ensemble_md.analysis.analyze_traj.plt')
+@patch('ensemble_md.analysis.analyze_traj.get_dg_evolution')
+def test_plot_dg_evolution(mock_fn, mock_plt):  # the outer decorator mock_plt should be the second parameter
+    # Test 1: Short dg
+    mock_fn.return_value = np.arange(10)
+    dg = analyze_traj.plot_dg_evolution(['log_0.log'], 1, 3)  # the values of log_files does not matter since the mocked value of dg is specified anyway  # noqa: E501
+    mock_fn.assert_called_once_with(['log_0.log'], 1, 3)
+    np.testing.assert_array_equal(dg, np.arange(10))
+    t = np.array([0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
+
+    mock_plt.figure.assert_called()
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[0][0][0], t)
+    np.testing.assert_array_equal(mock_plt.plot.call_args_list[0][0][1], dg)
+    mock_plt.xlabel.assert_called_once_with('Time (ps)')
+    mock_plt.ylabel.assert_called_once_with(r'$\Delta g$')
+    mock_plt.grid.assert_called_once()
+    mock_plt.savefig.assert_called_once_with('dg_evolution.png', dpi=600)
+
+    # Test 2: Long dg
+    mock_fn.reset_mock()
+    mock_plt.reset_mock()
+    mock_fn.return_value = np.arange(20000)
+    dg = analyze_traj.plot_dg_evolution(['log_0.log'], 1, 3, start_idx=100)
+    mock_fn.assert_called_once_with(['log_0.log'], 1, 3)
+    np.testing.assert_array_equal(dg, np.arange(20000)[100:])
+    mock_plt.xlabel.assert_called_once_with('Time (ns)')
 
 
 def test_get_delta_w_updates():

From 0f9952bfc38945abb269a44b3482902ed1ac382f Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Tue, 9 Apr 2024 16:15:01 +0800
Subject: [PATCH 41/41] Tweaked get_delta_w_updates and added a unit test for
 it

---
 ensemble_md/analysis/analyze_traj.py   | 22 +++++++++----------
 ensemble_md/tests/test_analyze_traj.py | 30 ++++++++++++++++++++++++--
 2 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py
index 74927def..40778938 100644
--- a/ensemble_md/analysis/analyze_traj.py
+++ b/ensemble_md/analysis/analyze_traj.py
@@ -1172,8 +1172,8 @@ def get_delta_w_updates(log_file, plot=False):
     Returns
     -------
     t_updates : list
-        A list of time frames when the Wang-Landau incrementor is updated.
-    delta_updates : list
+        A list of time frames (in ns) when the Wang-Landau incrementor is updated.
+    delta_w_updates : list
         A list of the updated Wang-Landau incrementors. Should be the same
         length as :code:`t_updates`.
     equil : bool
@@ -1185,7 +1185,7 @@ def get_delta_w_updates(log_file, plot=False):
 
     # Get the parameters
     for l in lines:  # noqa: E741
-        if 'dt ' in l:
+        if ' dt ' in l:
             dt = float(l.split('=')[-1])
         if 'init-wl-delta ' in l:
             init_wl_delta = float(l.split('=')[-1])
@@ -1198,7 +1198,7 @@ def get_delta_w_updates(log_file, plot=False):
 
     # Start parsing the data
     n = -1
-    t_updates, delta_updates = [0], [init_wl_delta]
+    t_updates, delta_w_updates = [0], [init_wl_delta]
     for l in lines:  # noqa: E741
         n += 1
         if 'weights are now' in l:
@@ -1207,30 +1207,30 @@ def get_delta_w_updates(log_file, plot=False):
             # search the following 10 lines to find the Wang-Landau incrementor
             for i in range(10):
                 if 'Wang-Landau incrementor is:' in lines[n + i]:
-                    delta_updates.append(float(lines[n + i].split()[-1]))
+                    delta_w_updates.append(float(lines[n + i].split()[-1]))
                     break
         if 'Weights have equilibrated' in l:
             equil = True
             break
 
     if equil is True:
-        delta_updates.append(delta_updates[-1] * wl_scale)
+        delta_w_updates.append(delta_w_updates[-1] * wl_scale)
 
     # Plot the Wang-Landau incrementor as a function of time if requested
     # Note that between adjacen entries in t_updates, a horizontal line should be drawn.
     if plot is True:
         plt.figure()
         for i in range(len(t_updates) - 1):
-            plt.plot([t_updates[i], t_updates[i + 1]], [delta_updates[i], delta_updates[i]], c='C0')
-            plt.plot([t_updates[i + 1], t_updates[i + 1]], [delta_updates[i], delta_updates[i + 1]], c='C0')
+            plt.plot([t_updates[i], t_updates[i + 1]], [delta_w_updates[i], delta_w_updates[i]], c='C0')
+            plt.plot([t_updates[i + 1], t_updates[i + 1]], [delta_w_updates[i], delta_w_updates[i + 1]], c='C0')
 
         plt.text(0.65, 0.95, f'init_wl_delta: {init_wl_delta}', transform=plt.gca().transAxes)
-        plt.text(0.65, 0.9, f'wl-scale: {wl_scale}', transform=plt.gca().transAxes)
+        plt.text(0.65, 0.9, f'wl_scale: {wl_scale}', transform=plt.gca().transAxes)
         plt.text(0.65, 0.85, f'wl_delta_cutoff: {wl_delta_cutoff}', transform=plt.gca().transAxes)
 
         plt.xlabel('Time (ns)')
         plt.ylabel(r'Wang-Landau incrementor ($k_{B}T$)')
         plt.grid()
-        plt.savefig('delta_updates.png', dpi=600)
+        plt.savefig('delta_w_updates.png', dpi=600)
 
-    return t_updates, delta_updates, equil
+    return t_updates, delta_w_updates, equil
diff --git a/ensemble_md/tests/test_analyze_traj.py b/ensemble_md/tests/test_analyze_traj.py
index 3d7347f7..0690bd7e 100644
--- a/ensemble_md/tests/test_analyze_traj.py
+++ b/ensemble_md/tests/test_analyze_traj.py
@@ -1051,5 +1051,31 @@ def test_plot_dg_evolution(mock_fn, mock_plt):  # the outer decorator mock_plt s
     mock_plt.xlabel.assert_called_once_with('Time (ns)')
 
 
-def test_get_delta_w_updates():
-    pass
+@patch('ensemble_md.analysis.analyze_traj.plt')
+def test_get_delta_w_updates(mock_plt):
+    # Test 1
+    t_updates, delta_w_updates, equil = analyze_traj.get_delta_w_updates('ensemble_md/tests/data/log/case2_1.log', plot=True)  # noqa: E501
+    np.testing.assert_almost_equal(t_updates, [0, 0.00104, 0.00204])
+    assert delta_w_updates == [0.4, 0.32, 0.256]
+    assert equil is True
+
+    mock_plt.figure.assert_called_once()
+    mock_plt.plot.assert_called()
+    mock_plt.xlabel.assert_called_once_with('Time (ns)')
+    mock_plt.ylabel.assert_called_once_with(r'Wang-Landau incrementor ($k_{B}T$)')
+    mock_plt.grid.cassert_called_once()
+    mock_plt.savefig.assert_called_once_with('delta_w_updates.png', dpi=600)
+
+    assert mock_plt.plot.call_count == 4
+    assert mock_plt.text.call_count == 3
+    assert mock_plt.plot.call_args_list[0][0][0] == t_updates[:2]
+    assert mock_plt.plot.call_args_list[0][0][1] == [0.4, 0.4]
+    assert mock_plt.plot.call_args_list[1][0][0] == [t_updates[1], t_updates[1]]
+    assert mock_plt.plot.call_args_list[1][0][1] == [0.4, 0.32]
+    assert mock_plt.plot.call_args_list[2][0][0] == t_updates[1:]
+    assert mock_plt.plot.call_args_list[2][0][1] == [0.32, 0.32]
+    assert mock_plt.plot.call_args_list[3][0][0] == [0.00204, 0.00204]
+    assert mock_plt.plot.call_args_list[3][0][1] == [0.32, 0.256]
+    assert mock_plt.text.call_args_list[0][0] == (0.65, 0.95, 'init_wl_delta: 0.4')
+    assert mock_plt.text.call_args_list[1][0] == (0.65, 0.9, 'wl_scale: 0.8')
+    assert mock_plt.text.call_args_list[2][0] == (0.65, 0.85, 'wl_delta_cutoff: 0.3')