Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Statistical Model for SDI - first level and second level #73

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ python:
path: .
extra_requirements:
- doc
- stats
system_packages: true
10 changes: 9 additions & 1 deletion nigsp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@
import pkgutil

from ._version import get_versions
from .operations import graph, laplacian, metrics, nifti, surrogates, timeseries
from .operations import (
graph,
laplacian,
metrics,
nifti,
statistics,
surrogates,
timeseries,
)

SKIP_MODULES = ["tests"]

Expand Down
1 change: 1 addition & 0 deletions nigsp/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .laplacian import decomposition, symmetric_normalised_laplacian
from .metrics import functional_connectivity, gsdi, sdi
from .nifti import apply_atlas, apply_mask, mat_to_vol, unfold_atlas, unmask, vol_to_mat
from .statistics import two_level_statistical_model
from .surrogates import random_sign, sc_informed, sc_uninformed, test_significance
from .timeseries import (
graph_filter,
Expand Down
195 changes: 195 additions & 0 deletions nigsp/operations/statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import logging

import numpy as np
from mne.stats import permutation_t_test
venkateshness marked this conversation as resolved.
Show resolved Hide resolved

LGR = logging.getLogger(__name__)


def ranktest(a, axis=None):
# Code adapted from `scipy`; ref: https://github.com/scipy/scipy/blob/v1.11.2/scipy/stats/_stats_py.py#L10123
venkateshness marked this conversation as resolved.
Show resolved Hide resolved

"""Assign ranks to data, dealing with ties appropriately.

By default (``axis=None``), the data array is first flattened, and a flat
array of ranks is returned. Separately reshape the rank array to the
shape of the data array if desired (see Examples).

Ranks begin at 1. The `method` argument controls how ranks are assigned
to equal values.

Parameters
----------
a : array_like
The array of values to be ranked.
axis : {None, int}, optional
Axis along which to perform the ranking. If ``None``, the data array
is first flattened.

Returns
-------
ranks : ndarray
An array of size equal to the size of `a`, containing rank
scores.
"""

if axis is not None:
a = np.asarray(a)
return np.apply_along_axis(ranktest, axis, a)

arr = np.ravel(np.asarray(a))
algo = "quicksort"
sorter = np.argsort(arr, kind=algo)
inv = np.empty(sorter.size, dtype=np.intp)
inv[sorter] = np.arange(sorter.size, dtype=np.intp)

arr = arr[sorter]
obs = np.r_[True, arr[1:] != arr[:-1]]
dense = obs.cumsum()[inv]

# cumulative counts of each unique value
count = np.r_[np.nonzero(obs)[0], len(obs)]

# average method
return 0.5 * (count[dense] + count[dense - 1] + 1)


# Code adapted from `mne-python`; ref: https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html
venkateshness marked this conversation as resolved.
Show resolved Hide resolved
def ttest_1samp_no_p(X, axis=0):
"""Perform one-sample t-test.

This is a modified version of :func:`scipy.stats.ttest_1samp` that avoids
venkateshness marked this conversation as resolved.
Show resolved Hide resolved
a (relatively) time-consuming p-value calculation, and can adjust
for implausibly small variance values: RidgwayEtAl2012 - https://doi.org/10.1016/j.neuroimage.2011.10.027.
venkateshness marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
X : array
Array to return t-values for.
axis: int
Axis along which to compute test. Default is 0.
sigma : float
The variance estimate will be given by ``var + sigma * max(var)`` or
``var + sigma``, depending on "method". By default this is 0 (no
adjustment). See Notes for details.

Returns
-------
t : array
T-values, potentially adjusted using the hat method.
"""
var = np.var(X, axis=axis, ddof=1)
return np.mean(X, axis=0) / np.sqrt(var / X.shape[0])


def two_level_statistical_model(
empirical_SDI,
surrogate_SDI,
output_dir_first_level=None,
output_dir_second_level=None,
n_perms=1000,
n_jobs=-1,
):
"""
Summary
-------
Involves 3 steps:
a)
Takes in the empirical SDI to test against the surrogate SDI.
Surrogate SDI typically contains specific amount of realistic null counterpart of the empirical SDI.
(See surrogate module in NiGSP for more details)

Create a distribution of test statistics out of the empirical and surrogate SDIs using Wilcoxon signed rank test.
This distribution shows the strength of the observed SDI compared to the surrogate SDIs

b)
First level: Checks for the consistency of the effects over trials / epochs / events, extended for all the subjects individually
Uses the test statistics from the previous step to test for the effect using parametric one-sample t-test.
(See mne.stats.ttest_1samp_no_p for more details. https://mne.tools/stable/generated/mne.stats.ttest_1samp_no_p.html)

c)
Second level: Stat test for the effect over subjects.
Use the test statistics from the first level and perform 2nd level modeling using massive univariate tests
and permutation-based correction for multiple comparisons.

Parameters
----------
empirical_SDI: Array of shape (n_events, n_subjects, n_roi)
SDI values for the empirical data
surrogate_SDI: Array of shape (n_events, n_surrogate, n_subjects, n_roi)
SDI values for the surrogate data
output_dir_first_level: str, optional
Directory to save the test statistics from the first level
output_dir_second_level: str, optional
Directory to save the test statistics from the second level
n_perms: int, optional
Number of permutations for the second level modeling
n_jobs: int, optional
Number of jobs to run in parallel for the second level modeling

Returns
-------
test_stats_second_level: Array of shape (n_events, n_roi)
Final test statistics tested for consistency across and within subjects with subsequent permutation-based
correction for multiple comparisons. It reveals the ROIs that are significantly consistent across and within subjects.
"""
venkateshness marked this conversation as resolved.
Show resolved Hide resolved
if empirical_SDI.ndim != 3 or surrogate_SDI.ndim != 4:
raise NotImplementedError(
"Please check the shape of both of the input arrays, they should be of shape (n_events, n_subjects, n_roi) and (n_events, n_surrogate, n_subjects, n_roi) respectively"
)

n_events, n_surrogate, n_subjects, n_roi = np.shape(surrogate_SDI)
assert np.shape(empirical_SDI) == (
n_events,
n_subjects,
n_roi,
), "Mismatch with empirical SDI; please input the right empirical and surrogate SDI pair"

# Step 1: Signed rank test
# a) Get the difference between the empirical and surrogate SDIs
diff = empirical_SDI - np.moveaxis(surrogate_SDI, [0, 1, 2, 3], [1, 0, 2, 3])

# b) Wilcoxon Test - A non-parametric test
# c) Normalization by the number of surrogates to avoid inflating the test statistics by the number of surrogates
LGR.info("Calculating test statistics using Wilcoxon signed rank test")
test_stats_signed_rank_test = (
np.sum(ranktest(np.abs(diff), axis=0) * np.sign(diff), axis=0) / n_surrogate
)

# test statistics summarizing for the trials, subjects and for the ROIs
assert test_stats_signed_rank_test.shape == (n_events, n_subjects, n_roi)

# Two-level modeling begins
# Step 2: First level Model
LGR.info("Performing First-level tests")
test_stats_first_level = ttest_1samp_no_p(test_stats_signed_rank_test)

# During testing on a real data, it was observed that test statistics sporadically yielded infinite values at a rate of <0.02%.

# Occurs at the step above: This issue arises when each value in the differenced population receives a unique rank, leading to a summation
# equivalent to n(n+1)/2, where n represents the number of surrogates. If this unique rank assignment is consistent
# across multiple events, every event ends up having the same summed rank. Consequently, during first-level statistical calculations,
# the population effectively becomes a single value distributed across all observations. This situation results in
# the test statistic being infinitely distant from 0.

# A simple workaround is to consider them as not significant and set them to 0.
test_stats_first_level[np.where(test_stats_first_level == np.inf)] = 0
test_stats_first_level[np.where(test_stats_first_level == -np.inf)] = 0

if output_dir_first_level is not None:
np.savez_compressed(
f"{output_dir_first_level}/test_stats_first_level.npz",
test_stats_first_level=test_stats_first_level,
)

# Step 3 : Second level Model
LGR.info("Performing Second-level tests")
test_stats_second_level, _, _ = permutation_t_test(
test_stats_first_level, n_jobs=n_jobs, n_permutations=n_perms
)
if output_dir_second_level is not None:
np.savez_compressed(
f"{output_dir_second_level}/test_stats_second_level.npz",
test_stats_second_level=test_stats_second_level,
)
return test_stats_second_level
54 changes: 54 additions & 0 deletions nigsp/tests/test_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import numpy as np
from numpy.random import rand
from pytest import raises

from nigsp.operations.statistics import two_level_statistical_model


# ### Unit tests
def test_two_level_statistical_model():
# Set up zero everywhere except specific roi, aka = signal on the other rois are equivalent to noise
# Tests specifically at the rois having signal
n_events = 30
n_surrogates = 40
n_roi = 360
n_subs = 20
empi_data = np.zeros((n_events, n_subs, n_roi))
surr_data = np.zeros((n_events, n_surrogates, n_subs, n_roi))

rois = np.random.choice(360, size=10, replace=False) # no replacement

for roi in rois:
random_empi = np.random.rand(n_events * n_subs)
random_surr = np.random.rand(n_events * n_surrogates * n_subs)
empi_data[:, :, roi] = random_empi.reshape(n_events, n_subs)
surr_data[:, :, :, roi] = random_surr.reshape(n_events, n_surrogates, n_subs)
n_perms = 50
second_level_stats = two_level_statistical_model(
empi_data,
surr_data,
n_perms=n_perms,
output_dir_first_level="/tmp",
output_dir_second_level="/tmp",
)

assert second_level_stats.shape == (n_roi,) # do we get tstats for each roi?
assert (
second_level_stats[rois] != 0
).all() # do we get tstats for predefined rois?
assert np.isnan(second_level_stats).sum() == n_roi - len(
rois
) # do we get nan for the rest of the rois?

# Stability test
second_level_stats_repeat = two_level_statistical_model(
empi_data, surr_data, n_perms=n_perms
)
assert np.isclose(second_level_stats[rois], second_level_stats_repeat[rois]).all()


### Break tests
def break_test_two_level_statistical_model():
venkateshness marked this conversation as resolved.
Show resolved Hide resolved
with raises(NotImplementedError) as errorinfo:
two_level_statistical_model(rand(2), rand(2, 3, 4, 5), n_perms=200)
assert "Please check the shape" in str(errorinfo.value)
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,13 @@ nifti =
viz =
matplotlib>=3.1.1
nilearn>=0.7.0
stats =
mne
all =
%(nifti)s
%(mat)s
%(viz)s
%(stats)s
doc =
sphinx>=2.0
sphinx-argparse
Expand Down