mind-inria · lionelkusch · Jan 2, 2025 · Jan 2, 2025 · Jan 2, 2025 · Jan 2, 2025
diff --git a/doc_conf/api.rst b/doc_conf/api.rst
@@ -26,7 +26,8 @@ Functions
    knockoff_aggregation
    model_x_knockoff
    multivariate_1D_simulation
-   permutation_test_cv
+   permutation_test
+   permutation_test_pval
    reid
    standardized_svr
    zscore_from_pval

diff --git a/doc_conf/references.bib b/doc_conf/references.bib
@@ -177,4 +177,23 @@ @article{liuFastPowerfulConditional2021
   archiveprefix = {arxiv},
   keywords = {Statistics - Methodology},
   file = {/home/ahmad/Zotero/storage/8HRQZX3H/Liu et al. - 2021 - Fast and Powerful Conditional Randomization Testin.pdf;/home/ahmad/Zotero/storage/YFNDKN2B/2006.html}
-}
+}
+
+@book{westfall1993resampling,
+  title={Resampling-based multiple testing: Examples and methods for p-value adjustment},
+  author={Westfall, Peter H and Young, S Stanley},
+  volume={279},
+  year={1993},
+  publisher={John Wiley \& Sons}
+}
+
+@article{hirschhorn2005genome,
+  title={Genome-wide association studies for common diseases and complex traits},
+  author={Hirschhorn, Joel N and Daly, Mark J},
+  journal={Nature reviews genetics},
+  volume={6},
+  number={2},
+  pages={95--108},
+  year={2005},
+  publisher={Nature Publishing Group UK London}
+}
diff --git a/examples/plot_fmri_data_example.py b/examples/plot_fmri_data_example.py
@@ -52,12 +52,13 @@
 from sklearn.cluster import FeatureAgglomeration
 from sklearn.feature_extraction import image
 from sklearn.linear_model import Ridge
+from sklearn.svm import LinearSVR
 from sklearn.utils import Bunch
 
 from hidimstat.adaptive_permutation_threshold import ada_svr
 from hidimstat.clustered_inference import clustered_inference
 from hidimstat.ensemble_clustered_inference import ensemble_clustered_inference
-from hidimstat.permutation_test import permutation_test, permutation_test_cv
+from hidimstat.permutation_test import permutation_test, permutation_test_pval
 from hidimstat.standardized_svr import standardized_svr
 from hidimstat.stat_tools import pval_from_scale, zscore_from_pval
 
@@ -152,18 +153,23 @@ def preprocess_haxby(subject=2, memory=None):
 SVR_permutation_test_inference = False
 if SVR_permutation_test_inference:
     # We computed the regularization parameter by CV (C = 0.1)
-    pval_corr_svr_perm_test, one_minus_pval_corr_svr_perm_test = permutation_test_cv(
-        X, y, n_permutations=50, C=0.1
+    estimator = LinearSVR(C=0.1)
-    estimator = LinearSVR(C=0.1)
+    estimator = RandomizedSearchCV( 
+        LinearSVR(random_state=42), 
+        param_distributions={ "C": np.logspace(-3, 3, 10), }, 
+        n_iter=10, 
+        n_jobs=5, 
+        random_state=42, 
+    )
-    estimator = LinearSVR(C=0.1)
+    estimator = RandomizedSearchCV( 
+        LinearSVR(random_state=42), 
+        param_distributions={ "C": np.logspace(-3, 3, 10), }, 
+        n_iter=10, 
+        n_jobs=5, 
+        random_state=42, 
+    )
+    weight_svr, weight_svr_distribution = permutation_test(X, y, n_permutations=50)
+    pval_corr_svr_perm_test, one_minus_pval_corr_svr_perm_test = permutation_test_pval(
+        weight_svr, weight_svr_distribution
     )
 
 # Another method is to compute the p-values by permutation test from the
 # Ridge decoder. The solution provided by this method should be very close to
 # the previous one and the computation time is much shorter: around 20 seconds.
 
 estimator = Ridge()
-pval_corr_ridge_perm_test, one_minus_pval_corr_ridge_perm_test = permutation_test(
+weight_ridge, weight_ridge_distribution = permutation_test(
     X, y, estimator=estimator, n_permutations=200
 )
+pval_corr_ridge_perm_test, one_minus_pval_corr_ridge_perm_test = permutation_test_pval(
+    weight_ridge, weight_ridge_distribution
+)
 
 #############################################################################
 # Now, let us run the algorithm introduced by Gaonkar et al. (c.f. References).

diff --git a/pyproject.toml b/pyproject.toml
@@ -80,4 +80,8 @@ where = ["src"]
 
 
 [tool.hatch.version]
-source = "vcs"
+source = "vcs"
+
+#pyproject.toml
+[tool.pytest.ini_options]
+addopts = "--ignore=src"   # ignore src directory
diff --git a/src/hidimstat/__init__.py b/src/hidimstat/__init__.py
@@ -7,7 +7,7 @@
 from .knockoffs import model_x_knockoff
 from .multi_sample_split import aggregate_quantiles
 from .noise_std import group_reid, reid
-from .permutation_test import permutation_test_cv
+from .permutation_test import permutation_test, permutation_test_pval
 from .scenario import multivariate_1D_simulation
 from .standardized_svr import standardized_svr
 from .stat_tools import zscore_from_pval
@@ -34,7 +34,8 @@
     "knockoff_aggregation",
     "model_x_knockoff",
     "multivariate_1D_simulation",
-    "permutation_test_cv",
+    "permutation_test",
+    "permutation_test_pval",
     "reid",
     "standardized_svr",
     "zscore_from_pval",

diff --git a/src/hidimstat/permutation_test.py b/src/hidimstat/permutation_test.py
@@ -1,25 +1,18 @@
 import numpy as np
 from joblib import Parallel, delayed
 from sklearn.base import clone
-from sklearn.model_selection import GridSearchCV
-from sklearn.pipeline import Pipeline
-from sklearn.svm import LinearSVR
-from sklearn.utils import _safe_indexing
-
-from hidimstat.stat_tools import pval_from_two_sided_pval_and_sign
-
-
-def permutation_test_cv(
-    X,
-    y,
-    n_permutations=1000,
-    C=None,
-    Cs=np.logspace(-7, 1, 9),
-    seed=0,
-    n_jobs=1,
-    verbose=1,
+
+from hidimstat.stat_tools import pval_from_two_sided_pval_and_sign, step_down_max_t
+
+
+def permutation_test(
+    X, y, estimator, n_permutations=1000, seed=0, n_jobs=None, verbose=0
 ):
-    """Cross-validated permutation test shuffling the target
+    """
+    Permutation test
+
+    This function compute the distribution of the weights of a linear model
+    by shuffling the target :footcite:t:`hirschhorn2005genome`.
 
     Parameters
     ----------
@@ -29,16 +22,8 @@ def permutation_test_cv(
     y : ndarray, shape (n_samples,)
         Target.
 
-    C : float or None, optional (default=None)
-        If None, the linear SVR regularization parameter is set by cross-val
-        running a grid search on the list of hyper-parameters contained in Cs.
-        Otherwise, the regularization parameter is equal to C.
-        The strength of the regularization is inversely proportional to C.
-
-    Cs : ndarray, optional (default=np.logspace(-7, 1, 9))
-        If C is None, the linear SVR regularization parameter is set by
-        cross-val running a grid search on the list of hyper-parameters
-        contained in Cs.
+    estimator : object LinearModel
+        The linear model used to fit the data.
 
     n_permutations : int, optional (default=1000)
         Number of permutations used to compute the survival function
@@ -47,77 +32,58 @@ def permutation_test_cv(
     seed : int, optional (default=0)
         Determines the permutations used for shuffling the target
 
-    n_jobs : int or None, optional (default=1)
+    n_jobs : int or None, optional (default=None)
         Number of CPUs to use during the cross validation.
 
-    verbose: int, optional (default=1)
-        The verbosity level: if non zero, progress messages are printed
-        when computing the permutation stats in parralel.
-        The frequency of the messages increases with the verbosity level.
+    verbose : int, optional (default=0)
+        The verbosity level of the joblib.Parallel.
 
     Returns
     -------
-    pval_corr : ndarray, shape (n_features,)
-        p-value corrected for multiple testing, with numerically accurate
-        values for positive effects (ie., for p-value close to zero).
+    weights : ndarray, shape (n_features,)
+        The weights of the original model.
 
-    one_minus_pval_corr : ndarray, shape (n_features,)
-        One minus the corrected p-value, with numerically accurate
-        values for negative effects (ie., for p-value close to one).
-    """
+    weights_distribution : ndarray, shape (n_permutations, n_features)
+        The distribution of the weights of the model obtained by shuffling
+        the target n_permutations times.
 
-    if C is None:
+    References
+    ----------
+    .. footbibliography::
 
-        steps = [("SVR", LinearSVR())]
-        pipeline = Pipeline(steps)
-        parameters = {"SVR__C": Cs}
-        grid = GridSearchCV(pipeline, param_grid=parameters, n_jobs=n_jobs)
-        grid.fit(X, y)
-        C = grid.best_params_["SVR__C"]
-        estimator = LinearSVR(C=C)
+    """
 
-    else:
+    rng = np.random.default_rng(seed)
 
-        estimator = LinearSVR(C=C)
+    # Get the weights of the original model
+    if not hasattr(estimator, "coef_"):
+        weights = _fit_and_weights(estimator, X, y)
+    else:
+        weights = estimator.coef_
 
-    pval_corr, one_minus_pval_corr = permutation_test(
-        X,
-        y,
-        estimator,
-        n_permutations=n_permutations,
-        seed=seed,
-        n_jobs=n_jobs,
-        verbose=verbose,
+    # Get the distribution of the weights by shuffling the target
+    weights_distribution = Parallel(n_jobs=n_jobs, verbose=verbose)(
+        delayed(_fit_and_weights)(clone(estimator), X, _shuffle(y, rng))
+        for _ in range(n_permutations)
     )
 
-    return pval_corr, one_minus_pval_corr
+    # Convert the list of weights into an array
+    weights_distribution = np.array(weights_distribution)
 
+    return weights, weights_distribution
 
-def permutation_test(X, y, estimator, n_permutations=1000, seed=0, n_jobs=1, verbose=1):
-    """Permutation test shuffling the target
+
+def permutation_test_pval(weights, weights_distribution):
+    """
+    Compute p-value from permutation test
 
     Parameters
     ----------
-    X : ndarray, shape (n_samples, n_features)
-        Data.
-
-    y : ndarray, shape (n_samples,)
-        Target.
-
-    n_permutations : int, optional (default=1000)
-        Number of permutations used to compute the survival function
-        and cumulative distribution function scores.
-
-    seed : int, optional (default=0)
-        Determines the permutations used for shuffling the target
+    weights : ndarray, shape (n_features,)
+        The weights of the original model.
 
-    n_jobs : int or None, optional (default=1)
-        Number of CPUs to use during the cross validation.
-
-    verbose: int, optional (default=1)
-        The verbosity level: if non zero, progress messages are printed
-        when computing the permutation stats in parralel.
-        The frequency of the messages increases with the verbosity level.
+    weights_distribution : ndarray, shape (n_permutations, n_features)
+        The distribution of the weights of the model obtained by shuffling
 
     Returns
     -------
@@ -129,20 +95,9 @@ def permutation_test(X, y, estimator, n_permutations=1000, seed=0, n_jobs=1, ver
         One minus the corrected p-value, with numerically accurate
         values for negative effects (ie., for p-value close to one).
     """
+    two_sided_pval_corr = step_down_max_t(weights, weights_distribution)
 
-    rng = np.random.default_rng(seed)
-
-    stat = _permutation_test_stat(clone(estimator), X, y)
-
-    permutation_stats = Parallel(n_jobs=n_jobs, verbose=verbose)(
-        delayed(_permutation_test_stat)(clone(estimator), X, _shuffle(y, rng))
-        for _ in range(n_permutations)
-    )
-
-    permutation_stats = np.array(permutation_stats)
-    two_sided_pval_corr = step_down_max_T(stat, permutation_stats)
-
-    stat_sign = np.sign(stat)
+    stat_sign = np.sign(weights)
 
     pval_corr, _, one_minus_pval_corr, _ = pval_from_two_sided_pval_and_sign(
         two_sided_pval_corr, stat_sign
@@ -151,65 +106,47 @@ def permutation_test(X, y, estimator, n_permutations=1000, seed=0, n_jobs=1, ver
     return pval_corr, one_minus_pval_corr
 
 
-def _permutation_test_stat(estimator, X, y):
-    """Fit estimator and get coef"""
-    stat = estimator.fit(X, y).coef_
-    return stat
-
-
-def _shuffle(y, rng):
-    """Shuffle vector"""
-    indices = rng.permutation(len(y))
-    return _safe_indexing(y, indices)
-
-
-def step_down_max_T(stat, permutation_stats):
-    """Step-down maxT algorithm for computing adjusted p-values
+def _fit_and_weights(estimator, X, y):
+    """
+    Fit the estimator and return the weights
 
     Parameters
     ----------
-    stat : ndarray, shape (n_features,)
-        Statistic computed on the original (unpermutted) problem.
+    estimator : object
+        The estimator to fit.
 
-    permutation_stats : ndarray, shape (n_permutations, n_features)
-        Statistics computed on permutted problems.
+    X : ndarray, shape (n_samples, n_features)
+        Data.
+
+    y : ndarray, shape (n_samples,)
+        Target.
 
     Returns
     -------
-    two_sided_pval_corr : ndarray, shape (n_features,)
-        Two-sided p-values corrected for multiple testing.
-
-    References
-    ----------
-    .. [1] Westfall, P. H., & Young, S. S. (1993). Resampling-based multiple
-           testing: Examples and methods for p-value adjustment (Vol. 279).
-           John Wiley & Sons.
+    weights : ndarray, shape (n_features,)
+        The weights of the estimator.
     """
+    weights = estimator.fit(X, y).coef_
+    return weights
 
-    n_permutations, n_features = np.shape(permutation_stats)
 
-    index_ordered = np.argsort(np.abs(stat))
-    stat_ranked = np.empty(n_features)
-    stat_ranked[index_ordered] = np.arange(n_features)
-    stat_ranked = stat_ranked.astype(int)
-    stat_sorted = np.copy(np.abs(stat)[index_ordered])
-    permutation_stats_ordered = np.copy(np.abs(permutation_stats)[:, index_ordered])
-
-    for i in range(1, n_features):
-        permutation_stats_ordered[:, i] = np.maximum(
-            permutation_stats_ordered[:, i - 1], permutation_stats_ordered[:, i]
-        )
-
-    two_sided_pval_corr = (
-        np.sum(np.less_equal(stat_sorted, permutation_stats_ordered), axis=0)
-        / n_permutations
-    )
+def _shuffle(y, rng):
+    """
+    Shuffle the target
 
-    for i in range(n_features - 1)[::-1]:
-        two_sided_pval_corr[i] = np.maximum(
-            two_sided_pval_corr[i], two_sided_pval_corr[i + 1]
-        )
+    Parameters
+    ----------
+    y : ndarray, shape (n_samples,)
+        Target.
 
-    two_sided_pval_corr = np.copy(two_sided_pval_corr[stat_ranked])
+    rng : numpy.random.Generator
+        Random number generator.
 
-    return two_sided_pval_corr
+    Returns
+    -------
+    y_shuffled : ndarray, shape (n_samples,)
+        Shuffled target.
+    """
+    y_copy = np.copy(y)
+    rng.shuffle(y_copy)
+    return y_copy