From 6e24476be05a2ce8423e45c693cb7c815b59ee4c Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sat, 19 Aug 2023 20:09:03 -0400 Subject: [PATCH 01/14] locally test script --- nigsp/operations/statistics.py | 264 +++++++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 nigsp/operations/statistics.py diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py new file mode 100644 index 0000000..788688e --- /dev/null +++ b/nigsp/operations/statistics.py @@ -0,0 +1,264 @@ +import numpy as np +from mne.stats import permutation_t_test +import logging +LGR = logging.getLogger(__name__) + +def ranktest(a, axis=None): + # Code adapted from `scipy`; ref: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rankdata.html + + """Assign ranks to data, dealing with ties appropriately. + + By default (``axis=None``), the data array is first flattened, and a flat + array of ranks is returned. Separately reshape the rank array to the + shape of the data array if desired (see Examples). + + Ranks begin at 1. The `method` argument controls how ranks are assigned + to equal values. See [1]_ for further discussion of ranking methods. + + Parameters + ---------- + a : array_like + The array of values to be ranked. + axis : {None, int}, optional + Axis along which to perform the ranking. If ``None``, the data array + is first flattened. + + Returns + ------- + ranks : ndarray + An array of size equal to the size of `a`, containing rank + scores. + """ + + if axis is not None: + a = np.asarray(a) + if a.size == 0: + # The return values of `normalize_axis_index` are ignored. The + # call validates `axis`, even though we won't use it. + # use scipy._lib._util._normalize_axis_index when available + np.core.multiarray.normalize_axis_index(axis, a.ndim) + return np.empty(a.shape, dtype=np.float64) + return np.apply_along_axis(ranktest, axis, a) + + arr = np.ravel(np.asarray(a)) + algo = "quicksort" + sorter = np.argsort(arr, kind=algo) + inv = np.empty(sorter.size, dtype=np.intp) + inv[sorter] = np.arange(sorter.size, dtype=np.intp) + + arr = arr[sorter] + obs = np.r_[True, arr[1:] != arr[:-1]] + dense = obs.cumsum()[inv] + + # cumulative counts of each unique value + count = np.r_[np.nonzero(obs)[0], len(obs)] + + # average method + return 0.5 * (count[dense] + count[dense - 1] + 1) + + +class onesamplettest: + # Code adapted from `mne-python`; ref: https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html + def _check_option(parameter, value, allowed_values, extra=""): + """Check the value of a parameter against a list of valid options. + + Return the value if it is valid, otherwise raise a ValueError with a + readable error message. + + Parameters + ---------- + parameter : str + The name of the parameter to check. This is used in the error message. + value : any type + The value of the parameter to check. + allowed_values : list + The list of allowed values for the parameter. + extra : str + Extra string to append to the invalid value sentence, e.g. + "when using ico mode". + + Raises + ------ + ValueError + When the value of the parameter is not one of the valid options. + + Returns + ------- + value : any type + The value if it is valid. + """ + if value in allowed_values: + return value + + # Prepare a nice error message for the user + extra = f" {extra}" if extra else extra + msg = ( + "Invalid value for the '{parameter}' parameter{extra}. " + "{options}, but got {value!r} instead." + ) + allowed_values = list(allowed_values) # e.g., if a dict was given + if len(allowed_values) == 1: + options = f"The only allowed value is {repr(allowed_values[0])}" + else: + options = "Allowed values are " + if len(allowed_values) == 2: + options += " and ".join(repr(v) for v in allowed_values) + else: + options += ", ".join(repr(v) for v in allowed_values[:-1]) + options += f", and {repr(allowed_values[-1])}" + raise ValueError( + msg.format(parameter=parameter, options=options, value=value, extra=extra) + ) + + def _ttest_1samp_no_p(X, axis=0, sigma=0, method="relative"): + """Perform one-sample t-test. + + This is a modified version of :func:`scipy.stats.ttest_1samp` that avoids + a (relatively) time-consuming p-value calculation, and can adjust + for implausibly small variance values :footcite:`RidgwayEtAl2012`. + + Parameters + ---------- + X : array + Array to return t-values for. + axis: int + Axis along which to compute test. Default is 0. + sigma : float + The variance estimate will be given by ``var + sigma * max(var)`` or + ``var + sigma``, depending on "method". By default this is 0 (no + adjustment). See Notes for details. + method : str + If 'relative', the minimum variance estimate will be sigma * max(var), + if 'absolute' the minimum variance estimate will be sigma. + + Returns + ------- + t : array + T-values, potentially adjusted using the hat method. + + Notes + ----- + To use the "hat" adjustment method :footcite:`RidgwayEtAl2012`, a value + of ``sigma=1e-3`` may be a reasonable choice. + """ + onesamplettest._check_option("method", method, ["absolute", "relative"]) + var = np.var(X, axis=axis, ddof=1) + if sigma > 0: + limit = sigma * np.max(var) if method == "relative" else sigma + var += limit + return np.mean(X, axis=0) / np.sqrt(var / X.shape[0]) + + +def stats( + empirical_SDI, + surrogate_SDI, + output_dir_first_level=None, + output_dir_second_level=None, + n_perms=1000, +): + """ + Summary + ------- + Involves 3 steps: + a) + Takes in the empirical SDI to test against the surrogate SDI. + Surrogate SDI typically contains specific amount of realistic null counterpart of the empirical SDI. + (See nigsp.surrogate module for more details. https://nigsp.readthedocs.io/en/latest/api/nigsp.operations.surrogates.html) + + Create a distribution of test statistics out of the empirical and surrogate SDIs using Wilcoxon signed rank test. + This distribution shows the strength of the observed SDI compared to the surrogate SDIs + + b) + First level: Checks for the consistency of the effects over trials / epochs / events, extended for all the subjects individually + Use the test statistics from the previous step to test for the effect using parametric one-sample t-test. + (See mne.stats.ttest_1samp_no_p for more details. https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html) + + c) + Second level: Stat test for the effect over subjects. + Use the test statistics from the first level and perform 2nd level modeling using massive univariate (non-parametric) ttests. + and permutation-based correction for multiple comparisons. + + Parameters + ---------- + empirical_SDI: Array of shape (n_events, n_subjects, n_roi) + SDI values for the empirical data + surrogate_SDI: Array of shape (n_events, n_surrogate, n_subjects, n_roi) + SDI values for the surrogate data + output_dir_first_level: str, optional + Directory to save the test statistics from the first level + output_dir_second_level: str, optional + Directory to save the test statistics from the second level + n_perms: int, optional + Number of permutations for the second level modeling + + Returns + ------- + test_stats_second_level: Array of shape (n_events, n_roi) + Final test statistics tested for consistency across and within subjects with subsequent permutation-based + correction for multiple comparisons. It reveals the ROIs that are significantly consistent across and within subjects. + """ + if empirical_SDI.ndim > 3 or surrogate_SDI.ndim > 4: + raise ValueError( + "Please check the shape of both of the input arrays, they should be of shape (n_events, n_subjects, n_roi) and (n_events, n_surrogate, n_subjects, n_roi) respectively" + ) + + + n_events, n_surrogate, n_subjects, n_roi = np.shape(surrogate_SDI) + assert np.shape(empirical_SDI) == ( + n_events, + n_subjects, + n_roi, + ), "Mismatch with empirical SDI; please input the right empirical and surrogate SDI pair" + + # Step 1: Signed rank test + # a) get the difference between the empirical and surrogate SDIs + diff = empirical_SDI - np.moveaxis(surrogate_SDI, [0, 1, 2, 3], [1, 0, 2, 3]) + + # b) Signed Wilcoxon Test - A non-parametric test + # c) Sum the ranks + # d) Normalize it by the number of surrogates to avoid inflating the test statistics by the number of surrogates + LGR.info("Calculating test statistics using Wilcoxon signed rank test") + test_stats_signed_rank_test = ( + np.sum(ranktest(np.abs(diff), axis=0) * np.sign(diff), axis=0) / n_surrogate + ) + + # test statistics summarizing for the trials, subjects and for the ROIs + assert test_stats_signed_rank_test.shape == (n_events, n_subjects, n_roi) + + # Two-level modeling begins + # Step 2: First level Model + LGR.info("Performing First-level tests") + test_stats_first_level = onesamplettest._ttest_1samp_no_p( + test_stats_signed_rank_test + ) + + # During testing on the data, it was observed that test statistics occasionally yielded infinite values, occurring at a rate of <0.02%. + + # Occurs at the step above: This issue arises when each value in the differenced population receives a unique rank, leading to a summation + # equivalent to n(n+1)/2, where n represents the number of elements (N) in the dataset. If this unique rank assignment is consistent + # across multiple events, every event ends up having the same summed rank. Consequently, during first-level statistical calculations, + # the population effectively becomes a single value distributed across all observations. This situation results in scipy indicating that + # the test statistic is infinitely distant from 0. + + # A simple workaround is to consider them is not significant and set them to 0. + test_stats_first_level[np.where(test_stats_first_level==np.inf)] = 0 + test_stats_first_level[np.where(test_stats_first_level==-np.inf)] = 0 + + if output_dir_first_level is not None: + np.savez_compressed( + f"{output_dir_first_level}/test_stats_first_level.npz", + test_stats_first_level=test_stats_first_level, + ) + + # Step 3 : Second level Model + LGR.info("Performing Second-level tests") + test_stats_second_level, _, _ = permutation_t_test( + test_stats_first_level, n_jobs=-1, n_permutations=n_perms + ) + if output_dir_second_level is not None: + np.savez_compressed( + f"{output_dir_second_level}/test_stats_second_level.npz", + test_stats_second_level=test_stats_second_level, + ) + return test_stats_second_level + From 4d2e3430b7318096d6d08be01734293820875be8 Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sat, 19 Aug 2023 20:19:25 -0400 Subject: [PATCH 02/14] docstring updates --- nigsp/operations/statistics.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py index 788688e..ac36fa3 100644 --- a/nigsp/operations/statistics.py +++ b/nigsp/operations/statistics.py @@ -115,7 +115,7 @@ def _ttest_1samp_no_p(X, axis=0, sigma=0, method="relative"): This is a modified version of :func:`scipy.stats.ttest_1samp` that avoids a (relatively) time-consuming p-value calculation, and can adjust - for implausibly small variance values :footcite:`RidgwayEtAl2012`. + for implausibly small variance values: https://doi.org/10.1016/j.neuroimage.2011.10.027. Parameters ---------- @@ -163,7 +163,7 @@ def stats( a) Takes in the empirical SDI to test against the surrogate SDI. Surrogate SDI typically contains specific amount of realistic null counterpart of the empirical SDI. - (See nigsp.surrogate module for more details. https://nigsp.readthedocs.io/en/latest/api/nigsp.operations.surrogates.html) + (See surrogate module in NiGSP for more details) Create a distribution of test statistics out of the empirical and surrogate SDIs using Wilcoxon signed rank test. This distribution shows the strength of the observed SDI compared to the surrogate SDIs @@ -175,7 +175,7 @@ def stats( c) Second level: Stat test for the effect over subjects. - Use the test statistics from the first level and perform 2nd level modeling using massive univariate (non-parametric) ttests. + Use the test statistics from the first level and perform 2nd level modeling using massive univariate ttests. and permutation-based correction for multiple comparisons. Parameters @@ -260,5 +260,4 @@ def stats( f"{output_dir_second_level}/test_stats_second_level.npz", test_stats_second_level=test_stats_second_level, ) - return test_stats_second_level - + return test_stats_second_level \ No newline at end of file From f6dd1de77b471047219c7ab73e86a92f51ea5372 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 20 Aug 2023 00:39:48 +0000 Subject: [PATCH 03/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- nigsp/operations/statistics.py | 36 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py index ac36fa3..c46f38e 100644 --- a/nigsp/operations/statistics.py +++ b/nigsp/operations/statistics.py @@ -1,8 +1,11 @@ +import logging + import numpy as np from mne.stats import permutation_t_test -import logging + LGR = logging.getLogger(__name__) + def ranktest(a, axis=None): # Code adapted from `scipy`; ref: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rankdata.html @@ -161,10 +164,10 @@ def stats( ------- Involves 3 steps: a) - Takes in the empirical SDI to test against the surrogate SDI. + Takes in the empirical SDI to test against the surrogate SDI. Surrogate SDI typically contains specific amount of realistic null counterpart of the empirical SDI. (See surrogate module in NiGSP for more details) - + Create a distribution of test statistics out of the empirical and surrogate SDIs using Wilcoxon signed rank test. This distribution shows the strength of the observed SDI compared to the surrogate SDIs @@ -172,11 +175,11 @@ def stats( First level: Checks for the consistency of the effects over trials / epochs / events, extended for all the subjects individually Use the test statistics from the previous step to test for the effect using parametric one-sample t-test. (See mne.stats.ttest_1samp_no_p for more details. https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html) - + c) Second level: Stat test for the effect over subjects. - Use the test statistics from the first level and perform 2nd level modeling using massive univariate ttests. - and permutation-based correction for multiple comparisons. + Use the test statistics from the first level and perform 2nd level modeling using massive univariate tests. + and permutation-based correction for multiple comparisons. Parameters ---------- @@ -194,14 +197,13 @@ def stats( Returns ------- test_stats_second_level: Array of shape (n_events, n_roi) - Final test statistics tested for consistency across and within subjects with subsequent permutation-based + Final test statistics tested for consistency across and within subjects with subsequent permutation-based correction for multiple comparisons. It reveals the ROIs that are significantly consistent across and within subjects. """ if empirical_SDI.ndim > 3 or surrogate_SDI.ndim > 4: raise ValueError( "Please check the shape of both of the input arrays, they should be of shape (n_events, n_subjects, n_roi) and (n_events, n_surrogate, n_subjects, n_roi) respectively" ) - n_events, n_surrogate, n_subjects, n_roi = np.shape(surrogate_SDI) assert np.shape(empirical_SDI) == ( @@ -213,7 +215,7 @@ def stats( # Step 1: Signed rank test # a) get the difference between the empirical and surrogate SDIs diff = empirical_SDI - np.moveaxis(surrogate_SDI, [0, 1, 2, 3], [1, 0, 2, 3]) - + # b) Signed Wilcoxon Test - A non-parametric test # c) Sum the ranks # d) Normalize it by the number of surrogates to avoid inflating the test statistics by the number of surrogates @@ -233,16 +235,16 @@ def stats( ) # During testing on the data, it was observed that test statistics occasionally yielded infinite values, occurring at a rate of <0.02%. - - # Occurs at the step above: This issue arises when each value in the differenced population receives a unique rank, leading to a summation - # equivalent to n(n+1)/2, where n represents the number of elements (N) in the dataset. If this unique rank assignment is consistent - # across multiple events, every event ends up having the same summed rank. Consequently, during first-level statistical calculations, - # the population effectively becomes a single value distributed across all observations. This situation results in scipy indicating that + + # Occurs at the step above: This issue arises when each value in the differenced population receives a unique rank, leading to a summation + # equivalent to n(n+1)/2, where n represents the number of elements (N) in the dataset. If this unique rank assignment is consistent + # across multiple events, every event ends up having the same summed rank. Consequently, during first-level statistical calculations, + # the population effectively becomes a single value distributed across all observations. This situation results in scipy indicating that # the test statistic is infinitely distant from 0. # A simple workaround is to consider them is not significant and set them to 0. - test_stats_first_level[np.where(test_stats_first_level==np.inf)] = 0 - test_stats_first_level[np.where(test_stats_first_level==-np.inf)] = 0 + test_stats_first_level[np.where(test_stats_first_level == np.inf)] = 0 + test_stats_first_level[np.where(test_stats_first_level == -np.inf)] = 0 if output_dir_first_level is not None: np.savez_compressed( @@ -260,4 +262,4 @@ def stats( f"{output_dir_second_level}/test_stats_second_level.npz", test_stats_second_level=test_stats_second_level, ) - return test_stats_second_level \ No newline at end of file + return test_stats_second_level From 3c5491c5ca95e97a3b5b81bedf32c6a7be26cdf2 Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sun, 20 Aug 2023 12:35:19 -0400 Subject: [PATCH 04/14] Stats module, unit tests --- nigsp/__init__.py | 10 ++++++- nigsp/operations/__init__.py | 1 + nigsp/operations/statistics.py | 52 ++++++++++++++++---------------- nigsp/tests/test_statistics.py | 54 ++++++++++++++++++++++++++++++++++ setup.cfg | 3 ++ 5 files changed, 94 insertions(+), 26 deletions(-) create mode 100644 nigsp/tests/test_statistics.py diff --git a/nigsp/__init__.py b/nigsp/__init__.py index 3d64b72..0d316dc 100644 --- a/nigsp/__init__.py +++ b/nigsp/__init__.py @@ -3,7 +3,15 @@ import pkgutil from ._version import get_versions -from .operations import graph, laplacian, metrics, nifti, surrogates, timeseries +from .operations import ( + graph, + laplacian, + metrics, + nifti, + statistics, + surrogates, + timeseries, +) SKIP_MODULES = ["tests"] diff --git a/nigsp/operations/__init__.py b/nigsp/operations/__init__.py index 6835232..8ca7d4a 100644 --- a/nigsp/operations/__init__.py +++ b/nigsp/operations/__init__.py @@ -17,6 +17,7 @@ from .laplacian import decomposition, symmetric_normalised_laplacian from .metrics import functional_connectivity, gsdi, sdi from .nifti import apply_atlas, apply_mask, mat_to_vol, unfold_atlas, unmask, vol_to_mat +from .statistics import stats from .surrogates import random_sign, sc_informed, sc_uninformed, test_significance from .timeseries import ( graph_filter, diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py index ac36fa3..a6a8ed1 100644 --- a/nigsp/operations/statistics.py +++ b/nigsp/operations/statistics.py @@ -1,8 +1,11 @@ +import logging + import numpy as np from mne.stats import permutation_t_test -import logging + LGR = logging.getLogger(__name__) + def ranktest(a, axis=None): # Code adapted from `scipy`; ref: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rankdata.html @@ -155,16 +158,17 @@ def stats( output_dir_first_level=None, output_dir_second_level=None, n_perms=1000, + n_jobs=-1, ): """ Summary ------- Involves 3 steps: a) - Takes in the empirical SDI to test against the surrogate SDI. + Takes in the empirical SDI to test against the surrogate SDI. Surrogate SDI typically contains specific amount of realistic null counterpart of the empirical SDI. (See surrogate module in NiGSP for more details) - + Create a distribution of test statistics out of the empirical and surrogate SDIs using Wilcoxon signed rank test. This distribution shows the strength of the observed SDI compared to the surrogate SDIs @@ -172,11 +176,11 @@ def stats( First level: Checks for the consistency of the effects over trials / epochs / events, extended for all the subjects individually Use the test statistics from the previous step to test for the effect using parametric one-sample t-test. (See mne.stats.ttest_1samp_no_p for more details. https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html) - + c) Second level: Stat test for the effect over subjects. - Use the test statistics from the first level and perform 2nd level modeling using massive univariate ttests. - and permutation-based correction for multiple comparisons. + Use the test statistics from the first level and perform 2nd level modeling using massive univariate tests. + and permutation-based correction for multiple comparisons. Parameters ---------- @@ -194,14 +198,13 @@ def stats( Returns ------- test_stats_second_level: Array of shape (n_events, n_roi) - Final test statistics tested for consistency across and within subjects with subsequent permutation-based + Final test statistics tested for consistency across and within subjects with subsequent permutation-based correction for multiple comparisons. It reveals the ROIs that are significantly consistent across and within subjects. """ - if empirical_SDI.ndim > 3 or surrogate_SDI.ndim > 4: + if empirical_SDI.ndim != 3 or surrogate_SDI.ndim != 4: raise ValueError( "Please check the shape of both of the input arrays, they should be of shape (n_events, n_subjects, n_roi) and (n_events, n_surrogate, n_subjects, n_roi) respectively" ) - n_events, n_surrogate, n_subjects, n_roi = np.shape(surrogate_SDI) assert np.shape(empirical_SDI) == ( @@ -211,12 +214,11 @@ def stats( ), "Mismatch with empirical SDI; please input the right empirical and surrogate SDI pair" # Step 1: Signed rank test - # a) get the difference between the empirical and surrogate SDIs + # a) Get the difference between the empirical and surrogate SDIs diff = empirical_SDI - np.moveaxis(surrogate_SDI, [0, 1, 2, 3], [1, 0, 2, 3]) - - # b) Signed Wilcoxon Test - A non-parametric test - # c) Sum the ranks - # d) Normalize it by the number of surrogates to avoid inflating the test statistics by the number of surrogates + + # b) Wilcoxon Test - A non-parametric test + # c) Normalize it by the number of surrogates to avoid inflating the test statistics by the number of surrogates LGR.info("Calculating test statistics using Wilcoxon signed rank test") test_stats_signed_rank_test = ( np.sum(ranktest(np.abs(diff), axis=0) * np.sign(diff), axis=0) / n_surrogate @@ -233,16 +235,16 @@ def stats( ) # During testing on the data, it was observed that test statistics occasionally yielded infinite values, occurring at a rate of <0.02%. - - # Occurs at the step above: This issue arises when each value in the differenced population receives a unique rank, leading to a summation - # equivalent to n(n+1)/2, where n represents the number of elements (N) in the dataset. If this unique rank assignment is consistent - # across multiple events, every event ends up having the same summed rank. Consequently, during first-level statistical calculations, - # the population effectively becomes a single value distributed across all observations. This situation results in scipy indicating that - # the test statistic is infinitely distant from 0. - # A simple workaround is to consider them is not significant and set them to 0. - test_stats_first_level[np.where(test_stats_first_level==np.inf)] = 0 - test_stats_first_level[np.where(test_stats_first_level==-np.inf)] = 0 + # Occurs at the step above: This issue arises when each value in the differenced population receives a unique rank, leading to a summation + # equivalent to n(n+1)/2, where n represents the number of events n_events. If this unique rank assignment is consistent + # across multiple events, every event ends up having the same summed rank. Consequently, during first-level statistical calculations, + # the population effectively becomes a single value distributed across all observations. This situation results in indicating that + # the test statistic is infinitely distant from null. + + # A simple workaround is to consider the observations (events) are not significant and set them to 0. + test_stats_first_level[np.where(test_stats_first_level == np.inf)] = 0 + test_stats_first_level[np.where(test_stats_first_level == -np.inf)] = 0 if output_dir_first_level is not None: np.savez_compressed( @@ -253,11 +255,11 @@ def stats( # Step 3 : Second level Model LGR.info("Performing Second-level tests") test_stats_second_level, _, _ = permutation_t_test( - test_stats_first_level, n_jobs=-1, n_permutations=n_perms + test_stats_first_level, n_jobs=n_jobs, n_permutations=n_perms ) if output_dir_second_level is not None: np.savez_compressed( f"{output_dir_second_level}/test_stats_second_level.npz", test_stats_second_level=test_stats_second_level, ) - return test_stats_second_level \ No newline at end of file + return test_stats_second_level diff --git a/nigsp/tests/test_statistics.py b/nigsp/tests/test_statistics.py new file mode 100644 index 0000000..141618c --- /dev/null +++ b/nigsp/tests/test_statistics.py @@ -0,0 +1,54 @@ +# %% +import numpy as np +from numpy.random import default_rng, rand +from pytest import raises + +import nigsp.operations as ops +from nigsp.operations.statistics import stats + + +# ### Unit tests +def test_stats(): + # Set up zero everywhere except specific roi, aka = signal on the other rois are equivalent to noise + # Tests specifically at the rois having signal + n_events = 30 + n_surrogates = 40 + n_roi = 360 + n_subs = 20 + empi_data = np.zeros((n_events, n_subs, n_roi)) + surr_data = np.zeros((n_events, n_surrogates, n_subs, n_roi)) + + rois = np.random.randint(0, n_roi, 10) + + for roi in rois: + random_empi = np.random.rand(n_events * n_subs) + random_surr = np.random.rand(n_events * n_surrogates * n_subs) + empi_data[:, :, roi] = random_empi.reshape(n_events, n_subs) + surr_data[:, :, :, roi] = random_surr.reshape(n_events, n_surrogates, n_subs) + n_perms = 200 + second_level_stats = stats(empi_data, surr_data, n_perms=n_perms) + + assert second_level_stats.shape == (n_roi,) # do we get tstats for each roi? + assert ( + second_level_stats[rois] != 0 + ).all() # do we get tstats for predefined rois? + assert np.isnan(second_level_stats).sum() == n_roi - len( + rois + ) # do we get nan for the rest of the rois? + + +# ### Break tests +# def break_test_stats(): +# # Break +# n_events = 30 +# n_surrogates = 40 +# n_roi = 360 +# n_subs = 20 +# empi_data = np.zeros(( n_subs, n_roi)) +# surr_data = np.zeros((n_events, n_subs, n_roi)) + +# # ops.statistics.stats(empi_data, surr_data, n_perms=200) + +# with raises(NotImplementedError) as errorinfo: +# ops.statistics.stats(rand(2, 3, 4, 5), rand(2, 3, 4, 5), n_perms=200) +# assert "has 4 dimensions" in str(errorinfo.value) diff --git a/setup.cfg b/setup.cfg index 4cc67d2..38cd570 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,10 +43,13 @@ nifti = viz = matplotlib>=3.1.1 nilearn>=0.7.0 +stats = + mne all = %(nifti)s %(mat)s %(viz)s + %(stats)s doc = sphinx>=2.0 sphinx-argparse From 7079b637d6a46a6d84ebe3c6fce6731ef0b6f705 Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sun, 20 Aug 2023 13:16:59 -0400 Subject: [PATCH 05/14] break test --- nigsp/tests/test_statistics.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/nigsp/tests/test_statistics.py b/nigsp/tests/test_statistics.py index 141618c..ab26e6e 100644 --- a/nigsp/tests/test_statistics.py +++ b/nigsp/tests/test_statistics.py @@ -37,18 +37,9 @@ def test_stats(): ) # do we get nan for the rest of the rois? -# ### Break tests -# def break_test_stats(): -# # Break -# n_events = 30 -# n_surrogates = 40 -# n_roi = 360 -# n_subs = 20 -# empi_data = np.zeros(( n_subs, n_roi)) -# surr_data = np.zeros((n_events, n_subs, n_roi)) - -# # ops.statistics.stats(empi_data, surr_data, n_perms=200) - -# with raises(NotImplementedError) as errorinfo: -# ops.statistics.stats(rand(2, 3, 4, 5), rand(2, 3, 4, 5), n_perms=200) -# assert "has 4 dimensions" in str(errorinfo.value) +### Break tests +def break_test_stats(): + # Break + with raises(NotImplementedError) as errorinfo: + ops.statistics.stats(rand(2, 3, 4, 5), rand(2, 3, 4, 5), n_perms=200) + assert "has 1 dimension" in str(errorinfo.value) From 25af06d9acf925f09c99b48b89f5d3e374298bb8 Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sun, 20 Aug 2023 14:43:20 -0400 Subject: [PATCH 06/14] dead code removed --- nigsp/operations/statistics.py | 134 +++++++++------------------------ nigsp/test.py | 0 nigsp/tests/test_statistics.py | 20 +++-- 3 files changed, 50 insertions(+), 104 deletions(-) create mode 100644 nigsp/test.py diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py index 832a9ef..4d5ad93 100644 --- a/nigsp/operations/statistics.py +++ b/nigsp/operations/statistics.py @@ -35,12 +35,6 @@ def ranktest(a, axis=None): if axis is not None: a = np.asarray(a) - if a.size == 0: - # The return values of `normalize_axis_index` are ignored. The - # call validates `axis`, even though we won't use it. - # use scipy._lib._util._normalize_axis_index when available - np.core.multiarray.normalize_axis_index(axis, a.ndim) - return np.empty(a.shape, dtype=np.float64) return np.apply_along_axis(ranktest, axis, a) arr = np.ravel(np.asarray(a)) @@ -60,96 +54,42 @@ def ranktest(a, axis=None): return 0.5 * (count[dense] + count[dense - 1] + 1) -class onesamplettest: - # Code adapted from `mne-python`; ref: https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html - def _check_option(parameter, value, allowed_values, extra=""): - """Check the value of a parameter against a list of valid options. - - Return the value if it is valid, otherwise raise a ValueError with a - readable error message. - - Parameters - ---------- - parameter : str - The name of the parameter to check. This is used in the error message. - value : any type - The value of the parameter to check. - allowed_values : list - The list of allowed values for the parameter. - extra : str - Extra string to append to the invalid value sentence, e.g. - "when using ico mode". - - Raises - ------ - ValueError - When the value of the parameter is not one of the valid options. - - Returns - ------- - value : any type - The value if it is valid. - """ - if value in allowed_values: - return value - - # Prepare a nice error message for the user - extra = f" {extra}" if extra else extra - msg = ( - "Invalid value for the '{parameter}' parameter{extra}. " - "{options}, but got {value!r} instead." - ) - allowed_values = list(allowed_values) # e.g., if a dict was given - if len(allowed_values) == 1: - options = f"The only allowed value is {repr(allowed_values[0])}" - else: - options = "Allowed values are " - if len(allowed_values) == 2: - options += " and ".join(repr(v) for v in allowed_values) - else: - options += ", ".join(repr(v) for v in allowed_values[:-1]) - options += f", and {repr(allowed_values[-1])}" - raise ValueError( - msg.format(parameter=parameter, options=options, value=value, extra=extra) - ) +# Code adapted from `mne-python`; ref: https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html +def ttest_1samp_no_p(X, axis=0, sigma=0): + """Perform one-sample t-test. + + This is a modified version of :func:`scipy.stats.ttest_1samp` that avoids + a (relatively) time-consuming p-value calculation, and can adjust + for implausibly small variance values: https://doi.org/10.1016/j.neuroimage.2011.10.027. + + Parameters + ---------- + X : array + Array to return t-values for. + axis: int + Axis along which to compute test. Default is 0. + sigma : float + The variance estimate will be given by ``var + sigma * max(var)`` or + ``var + sigma``, depending on "method". By default this is 0 (no + adjustment). See Notes for details. + method : str + If 'relative', the minimum variance estimate will be sigma * max(var), + if 'absolute' the minimum variance estimate will be sigma. + + Returns + ------- + t : array + T-values, potentially adjusted using the hat method. - def _ttest_1samp_no_p(X, axis=0, sigma=0, method="relative"): - """Perform one-sample t-test. - - This is a modified version of :func:`scipy.stats.ttest_1samp` that avoids - a (relatively) time-consuming p-value calculation, and can adjust - for implausibly small variance values: https://doi.org/10.1016/j.neuroimage.2011.10.027. - - Parameters - ---------- - X : array - Array to return t-values for. - axis: int - Axis along which to compute test. Default is 0. - sigma : float - The variance estimate will be given by ``var + sigma * max(var)`` or - ``var + sigma``, depending on "method". By default this is 0 (no - adjustment). See Notes for details. - method : str - If 'relative', the minimum variance estimate will be sigma * max(var), - if 'absolute' the minimum variance estimate will be sigma. - - Returns - ------- - t : array - T-values, potentially adjusted using the hat method. - - Notes - ----- - To use the "hat" adjustment method :footcite:`RidgwayEtAl2012`, a value - of ``sigma=1e-3`` may be a reasonable choice. - """ - onesamplettest._check_option("method", method, ["absolute", "relative"]) - var = np.var(X, axis=axis, ddof=1) - if sigma > 0: - limit = sigma * np.max(var) if method == "relative" else sigma - var += limit - return np.mean(X, axis=0) / np.sqrt(var / X.shape[0]) + Notes + ----- + To use the "hat" adjustment method :footcite:`RidgwayEtAl2012`, a value + of ``sigma=1e-3`` may be a reasonable choice. + """ + var = np.var(X, axis=axis, ddof=1) + if sigma > 0: + var += sigma + return np.mean(X, axis=0) / np.sqrt(var / X.shape[0]) def stats( @@ -231,9 +171,7 @@ def stats( # Two-level modeling begins # Step 2: First level Model LGR.info("Performing First-level tests") - test_stats_first_level = onesamplettest._ttest_1samp_no_p( - test_stats_signed_rank_test - ) + test_stats_first_level = ttest_1samp_no_p(test_stats_signed_rank_test) # During testing on the data, it was observed that test statistics occasionally yielded infinite values, occurring at a rate of <0.02%. diff --git a/nigsp/test.py b/nigsp/test.py new file mode 100644 index 0000000..e69de29 diff --git a/nigsp/tests/test_statistics.py b/nigsp/tests/test_statistics.py index ab26e6e..fccafe7 100644 --- a/nigsp/tests/test_statistics.py +++ b/nigsp/tests/test_statistics.py @@ -1,9 +1,7 @@ # %% import numpy as np -from numpy.random import default_rng, rand from pytest import raises -import nigsp.operations as ops from nigsp.operations.statistics import stats @@ -25,8 +23,14 @@ def test_stats(): random_surr = np.random.rand(n_events * n_surrogates * n_subs) empi_data[:, :, roi] = random_empi.reshape(n_events, n_subs) surr_data[:, :, :, roi] = random_surr.reshape(n_events, n_surrogates, n_subs) - n_perms = 200 - second_level_stats = stats(empi_data, surr_data, n_perms=n_perms) + n_perms = 50 + second_level_stats = stats( + empi_data, + surr_data, + n_perms=n_perms, + output_dir_first_level="/tmp", + output_dir_second_level="/tmp", + ) assert second_level_stats.shape == (n_roi,) # do we get tstats for each roi? assert ( @@ -36,10 +40,14 @@ def test_stats(): rois ) # do we get nan for the rest of the rois? + # Stability test + second_level_stats_repeat = stats(empi_data, surr_data, n_perms=n_perms) + assert np.isclose(second_level_stats[rois], second_level_stats_repeat[rois]).all() + ### Break tests def break_test_stats(): # Break with raises(NotImplementedError) as errorinfo: - ops.statistics.stats(rand(2, 3, 4, 5), rand(2, 3, 4, 5), n_perms=200) - assert "has 1 dimension" in str(errorinfo.value) + ops.statistics.stats(rand(2), rand(2, 3, 4, 5), n_perms=200) + assert "check the shape of both the input" in str(errorinfo.value) From 9f6c9040e2387e068d6ac82735fe704edd589140 Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sun, 20 Aug 2023 16:15:05 -0400 Subject: [PATCH 07/14] enforcing mne in readthedocs.yml --- .readthedocs.yml | 1 + nigsp/operations/statistics.py | 12 ++++-------- nigsp/tests/test_statistics.py | 4 ++-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index bc99c2a..1450c3d 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -20,4 +20,5 @@ python: path: . extra_requirements: - doc + - stats system_packages: true diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py index 4d5ad93..3669eef 100644 --- a/nigsp/operations/statistics.py +++ b/nigsp/operations/statistics.py @@ -7,7 +7,7 @@ def ranktest(a, axis=None): - # Code adapted from `scipy`; ref: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rankdata.html + # Code adapted from `scipy`; ref: https://github.com/scipy/scipy/blob/v1.11.2/scipy/stats/_stats_py.py#L10123 """Assign ranks to data, dealing with ties appropriately. @@ -16,7 +16,7 @@ def ranktest(a, axis=None): shape of the data array if desired (see Examples). Ranks begin at 1. The `method` argument controls how ranks are assigned - to equal values. See [1]_ for further discussion of ranking methods. + to equal values. Parameters ---------- @@ -60,7 +60,7 @@ def ttest_1samp_no_p(X, axis=0, sigma=0): This is a modified version of :func:`scipy.stats.ttest_1samp` that avoids a (relatively) time-consuming p-value calculation, and can adjust - for implausibly small variance values: https://doi.org/10.1016/j.neuroimage.2011.10.027. + for implausibly small variance values: RidgwayEtAl2012 - https://doi.org/10.1016/j.neuroimage.2011.10.027. Parameters ---------- @@ -72,9 +72,6 @@ def ttest_1samp_no_p(X, axis=0, sigma=0): The variance estimate will be given by ``var + sigma * max(var)`` or ``var + sigma``, depending on "method". By default this is 0 (no adjustment). See Notes for details. - method : str - If 'relative', the minimum variance estimate will be sigma * max(var), - if 'absolute' the minimum variance estimate will be sigma. Returns ------- @@ -158,8 +155,7 @@ def stats( diff = empirical_SDI - np.moveaxis(surrogate_SDI, [0, 1, 2, 3], [1, 0, 2, 3]) # b) Signed Wilcoxon Test - A non-parametric test - # c) Sum the ranks - # d) Normalize it by the number of surrogates to avoid inflating the test statistics by the number of surrogates + # c) Normalize it by the number of surrogates to avoid inflating the test statistics by the number of surrogates LGR.info("Calculating test statistics using Wilcoxon signed rank test") test_stats_signed_rank_test = ( np.sum(ranktest(np.abs(diff), axis=0) * np.sign(diff), axis=0) / n_surrogate diff --git a/nigsp/tests/test_statistics.py b/nigsp/tests/test_statistics.py index fccafe7..2d13785 100644 --- a/nigsp/tests/test_statistics.py +++ b/nigsp/tests/test_statistics.py @@ -1,5 +1,6 @@ # %% import numpy as np +from numpy.random import rand from pytest import raises from nigsp.operations.statistics import stats @@ -47,7 +48,6 @@ def test_stats(): ### Break tests def break_test_stats(): - # Break with raises(NotImplementedError) as errorinfo: - ops.statistics.stats(rand(2), rand(2, 3, 4, 5), n_perms=200) + stats(rand(2), rand(2, 3, 4, 5), n_perms=200) assert "check the shape of both the input" in str(errorinfo.value) From 1ed1b84c21b2a315d097081d8d334034f8d517dc Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sun, 20 Aug 2023 16:48:01 -0400 Subject: [PATCH 08/14] polishing & final touches --- nigsp/operations/__init__.py | 2 +- nigsp/operations/statistics.py | 6 ++---- nigsp/tests/test_statistics.py | 8 +++++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/nigsp/operations/__init__.py b/nigsp/operations/__init__.py index 8ca7d4a..377e022 100644 --- a/nigsp/operations/__init__.py +++ b/nigsp/operations/__init__.py @@ -17,7 +17,7 @@ from .laplacian import decomposition, symmetric_normalised_laplacian from .metrics import functional_connectivity, gsdi, sdi from .nifti import apply_atlas, apply_mask, mat_to_vol, unfold_atlas, unmask, vol_to_mat -from .statistics import stats +from .statistics import two_level_statistical_model from .surrogates import random_sign, sc_informed, sc_uninformed, test_significance from .timeseries import ( graph_filter, diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py index 3669eef..bfb2775 100644 --- a/nigsp/operations/statistics.py +++ b/nigsp/operations/statistics.py @@ -55,7 +55,7 @@ def ranktest(a, axis=None): # Code adapted from `mne-python`; ref: https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html -def ttest_1samp_no_p(X, axis=0, sigma=0): +def ttest_1samp_no_p(X, axis=0): """Perform one-sample t-test. This is a modified version of :func:`scipy.stats.ttest_1samp` that avoids @@ -84,12 +84,10 @@ def ttest_1samp_no_p(X, axis=0, sigma=0): of ``sigma=1e-3`` may be a reasonable choice. """ var = np.var(X, axis=axis, ddof=1) - if sigma > 0: - var += sigma return np.mean(X, axis=0) / np.sqrt(var / X.shape[0]) -def stats( +def two_level_statistical_model( empirical_SDI, surrogate_SDI, output_dir_first_level=None, diff --git a/nigsp/tests/test_statistics.py b/nigsp/tests/test_statistics.py index 2d13785..94b9502 100644 --- a/nigsp/tests/test_statistics.py +++ b/nigsp/tests/test_statistics.py @@ -3,7 +3,7 @@ from numpy.random import rand from pytest import raises -from nigsp.operations.statistics import stats +from nigsp.operations.statistics import two_level_statistical_model # ### Unit tests @@ -25,7 +25,7 @@ def test_stats(): empi_data[:, :, roi] = random_empi.reshape(n_events, n_subs) surr_data[:, :, :, roi] = random_surr.reshape(n_events, n_surrogates, n_subs) n_perms = 50 - second_level_stats = stats( + second_level_stats = two_level_statistical_model( empi_data, surr_data, n_perms=n_perms, @@ -42,7 +42,9 @@ def test_stats(): ) # do we get nan for the rest of the rois? # Stability test - second_level_stats_repeat = stats(empi_data, surr_data, n_perms=n_perms) + second_level_stats_repeat = two_level_statistical_model( + empi_data, surr_data, n_perms=200 + ) assert np.isclose(second_level_stats[rois], second_level_stats_repeat[rois]).all() From bbbcf91c17fa7036e205962b590ebbace3ef8da1 Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sun, 20 Aug 2023 18:03:22 -0400 Subject: [PATCH 09/14] further polishing --- nigsp/operations/statistics.py | 27 ++++++++++++--------------- nigsp/tests/test_statistics.py | 2 +- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py index bfb2775..7a419e9 100644 --- a/nigsp/operations/statistics.py +++ b/nigsp/operations/statistics.py @@ -77,11 +77,6 @@ def ttest_1samp_no_p(X, axis=0): ------- t : array T-values, potentially adjusted using the hat method. - - Notes - ----- - To use the "hat" adjustment method :footcite:`RidgwayEtAl2012`, a value - of ``sigma=1e-3`` may be a reasonable choice. """ var = np.var(X, axis=axis, ddof=1) return np.mean(X, axis=0) / np.sqrt(var / X.shape[0]) @@ -109,12 +104,12 @@ def two_level_statistical_model( b) First level: Checks for the consistency of the effects over trials / epochs / events, extended for all the subjects individually - Use the test statistics from the previous step to test for the effect using parametric one-sample t-test. + Uses the test statistics from the previous step to test for the effect using parametric one-sample t-test. (See mne.stats.ttest_1samp_no_p for more details. https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html) c) Second level: Stat test for the effect over subjects. - Use the test statistics from the first level and perform 2nd level modeling using massive univariate tests. + Use the test statistics from the first level and perform 2nd level modeling using massive univariate tests and permutation-based correction for multiple comparisons. Parameters @@ -129,6 +124,8 @@ def two_level_statistical_model( Directory to save the test statistics from the second level n_perms: int, optional Number of permutations for the second level modeling + n_jobs: int, optional + Number of jobs to run in parallel for the second level modeling Returns ------- @@ -137,7 +134,7 @@ def two_level_statistical_model( correction for multiple comparisons. It reveals the ROIs that are significantly consistent across and within subjects. """ if empirical_SDI.ndim != 3 or surrogate_SDI.ndim != 4: - raise ValueError( + raise NotImplementedError( "Please check the shape of both of the input arrays, they should be of shape (n_events, n_subjects, n_roi) and (n_events, n_surrogate, n_subjects, n_roi) respectively" ) @@ -152,8 +149,8 @@ def two_level_statistical_model( # a) Get the difference between the empirical and surrogate SDIs diff = empirical_SDI - np.moveaxis(surrogate_SDI, [0, 1, 2, 3], [1, 0, 2, 3]) - # b) Signed Wilcoxon Test - A non-parametric test - # c) Normalize it by the number of surrogates to avoid inflating the test statistics by the number of surrogates + # b) Wilcoxon Test - A non-parametric test + # c) Normalization by the number of surrogates to avoid inflating the test statistics by the number of surrogates LGR.info("Calculating test statistics using Wilcoxon signed rank test") test_stats_signed_rank_test = ( np.sum(ranktest(np.abs(diff), axis=0) * np.sign(diff), axis=0) / n_surrogate @@ -167,15 +164,15 @@ def two_level_statistical_model( LGR.info("Performing First-level tests") test_stats_first_level = ttest_1samp_no_p(test_stats_signed_rank_test) - # During testing on the data, it was observed that test statistics occasionally yielded infinite values, occurring at a rate of <0.02%. + # During testing on a real data, it was observed that test statistics sporadically yielded infinite values at a rate of <0.02%. # Occurs at the step above: This issue arises when each value in the differenced population receives a unique rank, leading to a summation - # equivalent to n(n+1)/2, where n represents the number of elements (N) in the dataset. If this unique rank assignment is consistent + # equivalent to n(n+1)/2, where n represents the number of events. If this unique rank assignment is consistent # across multiple events, every event ends up having the same summed rank. Consequently, during first-level statistical calculations, - # the population effectively becomes a single value distributed across all observations. This situation results in scipy indicating that - # the test statistic is infinitely distant from 0. + # the population effectively becomes a single value distributed across all observations. This situation results in + # the test statistic being infinitely distant from 0. - # A simple workaround is to consider them is not significant and set them to 0. + # A simple workaround is to consider them as not significant and set them to 0. test_stats_first_level[np.where(test_stats_first_level == np.inf)] = 0 test_stats_first_level[np.where(test_stats_first_level == -np.inf)] = 0 diff --git a/nigsp/tests/test_statistics.py b/nigsp/tests/test_statistics.py index 94b9502..a4547ca 100644 --- a/nigsp/tests/test_statistics.py +++ b/nigsp/tests/test_statistics.py @@ -51,5 +51,5 @@ def test_stats(): ### Break tests def break_test_stats(): with raises(NotImplementedError) as errorinfo: - stats(rand(2), rand(2, 3, 4, 5), n_perms=200) + two_level_statistical_model(rand(2), rand(2, 3, 4, 5), n_perms=200) assert "check the shape of both the input" in str(errorinfo.value) From 7e911eb3bc22307c52cd61857c658e780021f872 Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sun, 20 Aug 2023 18:18:39 -0400 Subject: [PATCH 10/14] possible fix --- nigsp/tests/test_statistics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nigsp/tests/test_statistics.py b/nigsp/tests/test_statistics.py index a4547ca..d932936 100644 --- a/nigsp/tests/test_statistics.py +++ b/nigsp/tests/test_statistics.py @@ -7,7 +7,7 @@ # ### Unit tests -def test_stats(): +def test_two_level_statistical_model(): # Set up zero everywhere except specific roi, aka = signal on the other rois are equivalent to noise # Tests specifically at the rois having signal n_events = 30 @@ -43,13 +43,13 @@ def test_stats(): # Stability test second_level_stats_repeat = two_level_statistical_model( - empi_data, surr_data, n_perms=200 + empi_data, surr_data, n_perms=n_perms ) assert np.isclose(second_level_stats[rois], second_level_stats_repeat[rois]).all() ### Break tests -def break_test_stats(): +def break_test_two_level_statistical_model(): with raises(NotImplementedError) as errorinfo: two_level_statistical_model(rand(2), rand(2, 3, 4, 5), n_perms=200) assert "check the shape of both the input" in str(errorinfo.value) From 473a1ff8a7f06a8177f8bdeec83b6b89f98c701d Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sun, 20 Aug 2023 18:29:11 -0400 Subject: [PATCH 11/14] removing test.py --- nigsp/test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 nigsp/test.py diff --git a/nigsp/test.py b/nigsp/test.py deleted file mode 100644 index e69de29..0000000 From 6d7bcd30790d875a3b5d03837fe7e2b2f9f77c3c Mon Sep 17 00:00:00 2001 From: venkateshness Date: Sun, 20 Aug 2023 23:26:24 -0400 Subject: [PATCH 12/14] docstring - correction --- nigsp/operations/statistics.py | 2 +- nigsp/tests/test_statistics.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py index 7a419e9..8a45b27 100644 --- a/nigsp/operations/statistics.py +++ b/nigsp/operations/statistics.py @@ -167,7 +167,7 @@ def two_level_statistical_model( # During testing on a real data, it was observed that test statistics sporadically yielded infinite values at a rate of <0.02%. # Occurs at the step above: This issue arises when each value in the differenced population receives a unique rank, leading to a summation - # equivalent to n(n+1)/2, where n represents the number of events. If this unique rank assignment is consistent + # equivalent to n(n+1)/2, where n represents the number of surrogates. If this unique rank assignment is consistent # across multiple events, every event ends up having the same summed rank. Consequently, during first-level statistical calculations, # the population effectively becomes a single value distributed across all observations. This situation results in # the test statistic being infinitely distant from 0. diff --git a/nigsp/tests/test_statistics.py b/nigsp/tests/test_statistics.py index d932936..3f87eed 100644 --- a/nigsp/tests/test_statistics.py +++ b/nigsp/tests/test_statistics.py @@ -1,4 +1,3 @@ -# %% import numpy as np from numpy.random import rand from pytest import raises @@ -52,4 +51,4 @@ def test_two_level_statistical_model(): def break_test_two_level_statistical_model(): with raises(NotImplementedError) as errorinfo: two_level_statistical_model(rand(2), rand(2, 3, 4, 5), n_perms=200) - assert "check the shape of both the input" in str(errorinfo.value) + assert "Please check the shape" in str(errorinfo.value) From bc0559128dfe6babe3de72f879bf97926f30aae5 Mon Sep 17 00:00:00 2001 From: venkateshness Date: Mon, 21 Aug 2023 00:18:06 -0400 Subject: [PATCH 13/14] fix - random ints wo replacement --- nigsp/operations/statistics.py | 2 +- nigsp/tests/test_statistics.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py index 7a419e9..8a45b27 100644 --- a/nigsp/operations/statistics.py +++ b/nigsp/operations/statistics.py @@ -167,7 +167,7 @@ def two_level_statistical_model( # During testing on a real data, it was observed that test statistics sporadically yielded infinite values at a rate of <0.02%. # Occurs at the step above: This issue arises when each value in the differenced population receives a unique rank, leading to a summation - # equivalent to n(n+1)/2, where n represents the number of events. If this unique rank assignment is consistent + # equivalent to n(n+1)/2, where n represents the number of surrogates. If this unique rank assignment is consistent # across multiple events, every event ends up having the same summed rank. Consequently, during first-level statistical calculations, # the population effectively becomes a single value distributed across all observations. This situation results in # the test statistic being infinitely distant from 0. diff --git a/nigsp/tests/test_statistics.py b/nigsp/tests/test_statistics.py index d932936..b0a252f 100644 --- a/nigsp/tests/test_statistics.py +++ b/nigsp/tests/test_statistics.py @@ -1,4 +1,3 @@ -# %% import numpy as np from numpy.random import rand from pytest import raises @@ -17,7 +16,7 @@ def test_two_level_statistical_model(): empi_data = np.zeros((n_events, n_subs, n_roi)) surr_data = np.zeros((n_events, n_surrogates, n_subs, n_roi)) - rois = np.random.randint(0, n_roi, 10) + rois = np.random.choice(360, size=10, replace=False) # no replacement for roi in rois: random_empi = np.random.rand(n_events * n_subs) @@ -52,4 +51,4 @@ def test_two_level_statistical_model(): def break_test_two_level_statistical_model(): with raises(NotImplementedError) as errorinfo: two_level_statistical_model(rand(2), rand(2, 3, 4, 5), n_perms=200) - assert "check the shape of both the input" in str(errorinfo.value) + assert "Please check the shape" in str(errorinfo.value) From 6fcd1577d77585322da080a80db8afe6f56351f3 Mon Sep 17 00:00:00 2001 From: venkateshness Date: Mon, 21 Aug 2023 19:45:18 -0400 Subject: [PATCH 14/14] round 1 --- nigsp/operations/statistics.py | 74 +++++++++++++++++++++++++++++++--- nigsp/tests/test_statistics.py | 11 ++++- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/nigsp/operations/statistics.py b/nigsp/operations/statistics.py index 8a45b27..3c06023 100644 --- a/nigsp/operations/statistics.py +++ b/nigsp/operations/statistics.py @@ -1,7 +1,6 @@ import logging import numpy as np -from mne.stats import permutation_t_test LGR = logging.getLogger(__name__) @@ -31,6 +30,26 @@ def ranktest(a, axis=None): ranks : ndarray An array of size equal to the size of `a`, containing rank scores. + + See Also + -------- + scipy.stats.rankdata + + + Notes + ---------- + Borrowed from [scipy](https://github.com/scipy/scipy/blob/v1.11.2/scipy/stats/_stats_py.py#L10123) + Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Re-distributing the source code with the BSD 3-Clause License """ if axis is not None: @@ -58,9 +77,8 @@ def ranktest(a, axis=None): def ttest_1samp_no_p(X, axis=0): """Perform one-sample t-test. - This is a modified version of :func:`scipy.stats.ttest_1samp` that avoids - a (relatively) time-consuming p-value calculation, and can adjust - for implausibly small variance values: RidgwayEtAl2012 - https://doi.org/10.1016/j.neuroimage.2011.10.027. + This function avoids a (relatively) time-consuming p-value calculation, + and can adjust for implausibly small variance values: Ridgway et al. 2012 [1] Parameters ---------- @@ -77,6 +95,44 @@ def ttest_1samp_no_p(X, axis=0): ------- t : array T-values, potentially adjusted using the hat method. + + References + ---------- + .. [1] Ridgway G et al 2012 https://doi.org/10.1016/j.neuroimage.2011.10.027. + + See Also + -------- + mne.stats.ttest_1samp_no_p + scipy.stats.ttest_1samp + + Notes + ----- + Borrowed from [mne-python](https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html) + Copyright (c) 2011-2022, authors of MNE-Python + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Re-distributing the source code with the BSD 3-Clause License + + + `mne.stats.ttest_1samp_no_p` was originally borrowed from [Scipy](https://github.com/scipy/scipy/blob/v1.11.1/scipy/stats/_stats_py.py#L6763-L6943) + Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Re-distributing the source code with the BSD 3-Clause License """ var = np.var(X, axis=axis, ddof=1) return np.mean(X, axis=0) / np.sqrt(var / X.shape[0]) @@ -110,7 +166,7 @@ def two_level_statistical_model( c) Second level: Stat test for the effect over subjects. Use the test statistics from the first level and perform 2nd level modeling using massive univariate tests - and permutation-based correction for multiple comparisons. + and permutation-based correction for multiple comparisons. Ref: https://doi.org/10.1002/hbm.1058 / Parameters ---------- @@ -133,6 +189,14 @@ def two_level_statistical_model( Final test statistics tested for consistency across and within subjects with subsequent permutation-based correction for multiple comparisons. It reveals the ROIs that are significantly consistent across and within subjects. """ + try: + from mne.stats import permutation_t_test + except ImportError: + raise ImportError( + "MNE-python is required to run this function", + "Please see installation instructions", + ) + if empirical_SDI.ndim != 3 or surrogate_SDI.ndim != 4: raise NotImplementedError( "Please check the shape of both of the input arrays, they should be of shape (n_events, n_subjects, n_roi) and (n_events, n_surrogate, n_subjects, n_roi) respectively" diff --git a/nigsp/tests/test_statistics.py b/nigsp/tests/test_statistics.py index b0a252f..bfdea7c 100644 --- a/nigsp/tests/test_statistics.py +++ b/nigsp/tests/test_statistics.py @@ -1,3 +1,6 @@ +import sys + +import mne.stats import numpy as np from numpy.random import rand from pytest import raises @@ -48,7 +51,13 @@ def test_two_level_statistical_model(): ### Break tests -def break_test_two_level_statistical_model(): +def test_break_two_level_statistical_model(): with raises(NotImplementedError) as errorinfo: two_level_statistical_model(rand(2), rand(2, 3, 4, 5), n_perms=200) assert "Please check the shape" in str(errorinfo.value) + + sys.modules["mne.stats"] = None + with raises(ImportError) as errorinfo: + two_level_statistical_model(rand(2), rand(2, 3, 4, 5), n_perms=200) + assert "required" in str(errorinfo.value) + sys.modules["mne"] = mne.stats