From 05aff137673f85cb2c376c819d97b69f0a487b28 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 21 Jan 2025 18:40:47 +0100
Subject: [PATCH] Rename files

---
 src/hidimstat/permutation_importance.py       | 355 +++++++++---------
 .../permutation_importance_function.py        | 203 ----------
 .../permutation_importance_scikitlearn.py     | 309 ---------------
 test/test_permutation_importance.py           |  90 +++--
 test/test_permutation_importance_function.py  | 120 ------
 ...test_permutation_importance_scikitlearn.py |  75 ----
 6 files changed, 243 insertions(+), 909 deletions(-)
 delete mode 100644 src/hidimstat/permutation_importance_function.py
 delete mode 100644 src/hidimstat/permutation_importance_scikitlearn.py
 delete mode 100644 test/test_permutation_importance_function.py
 delete mode 100644 test/test_permutation_importance_scikitlearn.py

diff --git a/src/hidimstat/permutation_importance.py b/src/hidimstat/permutation_importance.py
index ad6c9c1c..b6a76cef 100644
--- a/src/hidimstat/permutation_importance.py
+++ b/src/hidimstat/permutation_importance.py
@@ -1,192 +1,203 @@
 import numpy as np
-import pandas as pd
 from joblib import Parallel, delayed
-from sklearn.base import BaseEstimator, check_is_fitted
 from sklearn.metrics import root_mean_squared_error
+from sklearn.exceptions import NotFittedError
+from sklearn.utils import check_random_state
+from sklearn.base import clone
 
 from hidimstat.utils import _check_vim_predict_method
 
 
-class PermutationImportance(BaseEstimator):
+def permutation_importance(
+    X,
+    y,
+    estimator,
+    n_permutations: int = 50,
+    loss: callable = root_mean_squared_error,
+    method: str = "predict",
+    random_state: int = None,
+    n_jobs: int = None,
+    groups=None,
+):
     """
+    # Permutation importance
+
+    Calculate permutation importance scores for features or feature groups in a machine learning model.
+    Permutation importance is a model inspection technique that measures the increase in the model's
+    prediction error after permuting a feature's values. A feature is considered "important" if shuffling
+    its values increases the model error, because the model relied on the feature for the prediction.
+    The implementation follows the methodology described in chapter 10 :cite:breimanRandomForests2001.
+    One implementation: https://github.com/SkadiEye/deepTL/blob/master/R/4-2-permfit.R
+
     Parameters
     ----------
-    estimator: scikit-learn compatible estimator
-        The predictive model.
-    n_permutations: int, default=50
-        Number of permutations to perform.
-    loss: callable, default=root_mean_squared_error
-        Loss function to evaluate the model performance.
-    method: str, default='predict'
-        Method to use for predicting values that will be used to compute
-        the loss and the importance scores. The method must be implemented by the
-        estimator. Supported methods are 'predict', 'predict_proba',
-        'decision_function' and 'transform'.
-    random_state: int, default=None
-        Random seed for the permutation.
-    n_jobs: int, default=1
-        Number of jobs to run in parallel.
+    X : np.ndarray of shape (n_samples, n_features)
+        Training data. Can be numpy array or pandas DataFrame.
+    y : np.ndarray of shape (n_samples,)
+        Target values for the model.
+    estimator : object
+        A fitted estimator object implementing scikit-learn estimator interface.
+        The estimator must have a fitting method and one of the following prediction methods:
+        'predict', 'predict_proba', 'decision_function', or 'transform'.
+    n_permutations : int, default=50
+        Number of times to permute each feature or feature group.
+        Higher values give more stable results but take longer to compute.
+    loss : callable, default=root_mean_squared_error
+        Function to measure the prediction error. Must take two arguments (y_true, y_pred)
+        and return a scalar value. Higher return values must indicate worse predictions.
+    method : str, default='predict'
+        The estimator method used for prediction. Must be one of:
+        - 'predict': Use estimator.predict()
+        - 'predict_proba': Use estimator.predict_proba()
+        - 'decision_function': Use estimator.decision_function()
+        - 'transform': Use estimator.transform()
+    random_state : int, default=None
+        Controls the randomness of the feature permutations.
+        Pass an int for reproducible results across multiple function calls.
+    n_jobs : int, default=None
+        Number of jobs to run in parallel. None means 1 unless in a joblib.parallel_backend context.
+        -1 means using all processors.
+    groups : dict, default=None
+        Dictionary specifying feature groups. Keys are group names and values are lists of feature
+        indices or feature names (if X is a pandas DataFrame). If None, each feature is treated
+        as its own group.
+
+    Returns
+    -------
+    importance : np.ndarray of shape (n_features,) or (n_groups,)
+        The importance scores for each feature or feature group.
+        Higher values indicate more important features.
+    list_loss_j : np.ndarray
+        Array containing all computed loss values for each permutation of each feature/group.
+    loss_reference : float
+        The reference loss (baseline) computed on the original, non-permuted data.
+
+    Notes
+    -----
+    The implementation supports both individual feature importance and group feature importance.
+    For group importance, features within the same group are permuted together.
 
     References
     ----------
     .. footbibliography::
     """
 
-    def __init__(
-        self,
-        estimator,
-        n_permutations: int = 50,
-        loss: callable = root_mean_squared_error,
-        method: str = "predict",
-        random_state: int = None,
-        n_jobs: int = 1,
-    ):
-
-        check_is_fitted(estimator)
-        self.estimator = estimator
-        self.n_permutations = n_permutations
-
-        self.random_state = random_state
-        self.loss = loss
-        _check_vim_predict_method(method)
-        self.method = method
-        self.n_jobs = n_jobs
-        self.rng = np.random.RandomState(random_state)
-        self.n_groups = None
-
-    def fit(self, X, y=None, groups=None):
-        """
-        Parameters
-        ----------
-        X: np.ndarray of shape (n_samples, n_features)
-            The input samples. Not used here.
-        y: np.ndarray of shape (n_samples,)
-            The target values. Not used here.
-        groups: dict, default=None
-            Dictionary of groups for the covariates. The keys are the group names
-            and the values are lists of covariate indices.
-        """
-        self.groups = groups
-        return self
-
-    def predict(self, X, y=None):
-        """
-        Compute the prediction of the model with permuted data for each group.
-
-        Parameters
-        ----------
-        X: array-like of shape (n_samples, n_features)
-            The input samples.
-        y: array-like of shape (n_samples,)
-            The target values.
-
-        Returns
-        -------
-        premuted_y_pred: np.ndarray of shape (n_groups, n_permutations, n_samples)
-            The predictions of the model with permuted data for each group
-
-        """
-        check_is_fitted(self.estimator)
-        if self.groups is None:
-            self.n_groups = X.shape[1]
-            self.groups = {j: [j] for j in range(self.n_groups)}
+    # check parameters
+    _check_vim_predict_method(method)
+
+    # define a random generator
+    check_random_state(random_state)
+    rng = np.random.RandomState(random_state)
+
+    # management of the group
+    if groups is None:
+        n_groups = X.shape[1]
+        groups_ = {j: [j] for j in range(n_groups)}
+    else:
+        n_groups = len(groups)
+        if type(list(groups.values())[0][0]) is str:
+            groups_ = {}
+            for key, indexe_names in zip(groups.keys(), groups.values()):
+                groups_[key] = []
+                for index_name in indexe_names:
+                    index = np.where(index_name == X.columns)[0]
+                    assert len(index) == 1
+                    groups_[key].append(index)
         else:
-            self.n_groups = len(self.groups)
-
-        def _joblib_predict_one_group(X, j):
-            """
-            Compute the importance score for a single group of covariates.
-            """
-            if isinstance(X, pd.DataFrame):
-                X_j = X[self.groups[j]].copy().values
-                X_minus_j = X.drop(columns=self.groups[j]).values
-                group_ids = [
-                    i for i, col in enumerate(X.columns) if col in self.groups[j]
-                ]
-                non_group_ids = [
-                    i for i, col in enumerate(X.columns) if col not in self.groups[j]
-                ]
-            else:
-                X_j = X[:, self.groups[j]].copy()
-                X_minus_j = np.delete(X, self.groups[j], axis=1)
-                group_ids = self.groups[j]
-                non_group_ids = np.delete(np.arange(X.shape[1]), group_ids)
-
-            # Create an array X_perm_j of shape (n_permutations, n_samples, n_features)
-            # where the j-th group of covariates is permuted
-            X_perm_j = np.empty((self.n_permutations, X.shape[0], X.shape[1]))
-            X_perm_j[:, :, non_group_ids] = X_minus_j
-            # Create the permuted data for the j-th group of covariates
-            group_j_permuted = np.array(
-                [self.rng.permutation(X_j) for _ in range(self.n_permutations)]
-            )
-            X_perm_j[:, :, group_ids] = group_j_permuted
-            # Reshape X_perm_j to allow for batch prediction
-            X_perm_batch = X_perm_j.reshape(-1, X.shape[1])
-            if isinstance(X, pd.DataFrame):
-                X_perm_batch = pd.DataFrame(
-                    X_perm_batch.reshape(-1, X.shape[1]), columns=X.columns
-                )
-            y_pred_perm = getattr(self.estimator, self.method)(X_perm_batch)
-
-            # In case of classification, the output is a 2D array. Reshape accordingly
-            if y_pred_perm.ndim == 1:
-                y_pred_perm = y_pred_perm.reshape(self.n_permutations, X.shape[0])
-            else:
-                y_pred_perm = y_pred_perm.reshape(
-                    self.n_permutations, X.shape[0], y_pred_perm.shape[1]
-                )
-            return y_pred_perm
-
-        # Parallelize the computation of the importance scores for each group
-        out_list = Parallel(n_jobs=self.n_jobs)(
-            delayed(_joblib_predict_one_group)(X, j) for j in self.groups.keys()
+            groups_ = groups
+
+    X_ = np.asarray(X)  # avoid the management of panda dataframe
+
+    # compute the reference residual
+    try:
+        y_pred = getattr(estimator, method)(X)
+        estimator_ = estimator
+    except NotFittedError:
+        estimator_ = clone(estimator)
+        # case for not fitted esimator
+        estimator_.fit(X_, y)
+        y_pred = getattr(estimator_, method)(X)
+    loss_reference = loss(y, y_pred)
+
+    # Parallelize the computation of the residual for each permutation
+    # of each group
+    list_loss_j = Parallel(n_jobs=n_jobs)(
+        delayed(_predict_one_group)(
+            estimator_,
+            groups_[j],
+            X_,
+            y,
+            loss,
+            n_permutations,
+            rng,
+            method,
         )
+        for j in groups_.keys()
+    )
+    list_loss_j = np.array(list_loss_j)
 
-        premuted_y_pred = np.stack(out_list, axis=0)
-        return premuted_y_pred
-
-    def score(self, X, y):
-        """
-        Compute the importance scores for each group of covariates.
-
-        Parameters
-        ----------
-        X: array-like of shape (n_samples, n_features)
-            The input samples.
-        y: array-like of shape (n_samples,)
-            The target values.
-
-        Returns
-        -------
-        out_dict: dict
-            A dictionary containing the following keys:
-            - 'loss_reference': the loss of the model with the original data.
-            - 'loss_perm': a dictionary containing the loss of the model with
-            the permuted data for each group.
-            - 'importance': the importance scores for each group.
-        """
-        check_is_fitted(self.estimator)
-
-        output_dict = dict()
-        y_pred = getattr(self.estimator, self.method)(X)
-        loss_reference = self.loss(y, y_pred)
-        output_dict["loss_reference"] = loss_reference
-
-        y_pred_perm = self.predict(X, y)
-
-        output_dict["loss_perm"] = dict()
-        for j, y_pred_j in enumerate(y_pred_perm):
-            list_loss_perm = []
-            for y_pred_perm in y_pred_j:
-                list_loss_perm.append(self.loss(y, y_pred_perm))
-            output_dict["loss_perm"][j] = np.array(list_loss_perm)
-
-        output_dict["importance"] = np.array(
-            [
-                np.mean(output_dict["loss_perm"][j]) - output_dict["loss_reference"]
-                for j in range(self.n_groups)
-            ]
-        )
+    # compute the importance
+    # equation 5 of mi2021permutation
+    importance = np.mean(list_loss_j - loss_reference, axis=1)
 
-        return output_dict
+    return importance, list_loss_j, loss_reference
+
+
+def _predict_one_group(estimator, group_ids, X, y, loss, n_permutations, rng, method):
+    """
+    Compute prediction loss scores after permuting a single group of features.
+
+    Parameters
+    ----------
+    estimator : object
+        Fitted estimator implementing scikit-learn API
+    group_ids : list
+        Indices of features in the group to permute
+    X : np.ndarray
+        Input data matrix
+    y : np.ndarray
+        Target values
+    loss : callable
+        Loss function to evaluate predictions
+    n_permutations : int
+        Number of permutations to perform
+    rng : RandomState
+        Random number generator instance
+    method : str
+        Prediction method to use ('predict', 'predict_proba', etc.)
+
+    Returns
+    -------
+    list
+        Loss values for each permutation
+    """
+    # get ids
+    non_group_ids = np.delete(np.arange(X.shape[1]), group_ids)
+
+    # get data
+    X_j = X[:, group_ids].copy()
+    X_minus_j = np.delete(X, group_ids, axis=1)
+
+    # Create an array X_perm_j of shape (n_permutations, n_samples, n_features)
+    # where the j-th group of covariates is permuted
+    X_perm_j = np.empty((n_permutations, X.shape[0], X.shape[1]))
+    X_perm_j[:, :, non_group_ids] = X_minus_j
+
+    # Create the permuted data for the j-th group of covariates
+    group_j_permuted = np.array([rng.permutation(X_j) for _ in range(n_permutations)])
+    X_perm_j[:, :, group_ids] = group_j_permuted
+
+    # Reshape X_perm_j to allow for remove the indexation by groups
+    X_perm_batch = X_perm_j.reshape(-1, X.shape[1])
+    y_pred_perm = getattr(estimator, method)(X_perm_batch)
+
+    if y_pred_perm.ndim == 1:
+        # one value per y: regression
+        y_pred_perm = y_pred_perm.reshape(n_permutations, X.shape[0])
+    else:
+        # probability per y: classification
+        y_pred_perm = y_pred_perm.reshape(
+            n_permutations, X.shape[0], y_pred_perm.shape[1]
+        )
+    loss_i = [loss(y, y_pred_perm[i]) for i in range(n_permutations)]
+    return loss_i
diff --git a/src/hidimstat/permutation_importance_function.py b/src/hidimstat/permutation_importance_function.py
deleted file mode 100644
index b6a76cef..00000000
--- a/src/hidimstat/permutation_importance_function.py
+++ /dev/null
@@ -1,203 +0,0 @@
-import numpy as np
-from joblib import Parallel, delayed
-from sklearn.metrics import root_mean_squared_error
-from sklearn.exceptions import NotFittedError
-from sklearn.utils import check_random_state
-from sklearn.base import clone
-
-from hidimstat.utils import _check_vim_predict_method
-
-
-def permutation_importance(
-    X,
-    y,
-    estimator,
-    n_permutations: int = 50,
-    loss: callable = root_mean_squared_error,
-    method: str = "predict",
-    random_state: int = None,
-    n_jobs: int = None,
-    groups=None,
-):
-    """
-    # Permutation importance
-
-    Calculate permutation importance scores for features or feature groups in a machine learning model.
-    Permutation importance is a model inspection technique that measures the increase in the model's
-    prediction error after permuting a feature's values. A feature is considered "important" if shuffling
-    its values increases the model error, because the model relied on the feature for the prediction.
-    The implementation follows the methodology described in chapter 10 :cite:breimanRandomForests2001.
-    One implementation: https://github.com/SkadiEye/deepTL/blob/master/R/4-2-permfit.R
-
-    Parameters
-    ----------
-    X : np.ndarray of shape (n_samples, n_features)
-        Training data. Can be numpy array or pandas DataFrame.
-    y : np.ndarray of shape (n_samples,)
-        Target values for the model.
-    estimator : object
-        A fitted estimator object implementing scikit-learn estimator interface.
-        The estimator must have a fitting method and one of the following prediction methods:
-        'predict', 'predict_proba', 'decision_function', or 'transform'.
-    n_permutations : int, default=50
-        Number of times to permute each feature or feature group.
-        Higher values give more stable results but take longer to compute.
-    loss : callable, default=root_mean_squared_error
-        Function to measure the prediction error. Must take two arguments (y_true, y_pred)
-        and return a scalar value. Higher return values must indicate worse predictions.
-    method : str, default='predict'
-        The estimator method used for prediction. Must be one of:
-        - 'predict': Use estimator.predict()
-        - 'predict_proba': Use estimator.predict_proba()
-        - 'decision_function': Use estimator.decision_function()
-        - 'transform': Use estimator.transform()
-    random_state : int, default=None
-        Controls the randomness of the feature permutations.
-        Pass an int for reproducible results across multiple function calls.
-    n_jobs : int, default=None
-        Number of jobs to run in parallel. None means 1 unless in a joblib.parallel_backend context.
-        -1 means using all processors.
-    groups : dict, default=None
-        Dictionary specifying feature groups. Keys are group names and values are lists of feature
-        indices or feature names (if X is a pandas DataFrame). If None, each feature is treated
-        as its own group.
-
-    Returns
-    -------
-    importance : np.ndarray of shape (n_features,) or (n_groups,)
-        The importance scores for each feature or feature group.
-        Higher values indicate more important features.
-    list_loss_j : np.ndarray
-        Array containing all computed loss values for each permutation of each feature/group.
-    loss_reference : float
-        The reference loss (baseline) computed on the original, non-permuted data.
-
-    Notes
-    -----
-    The implementation supports both individual feature importance and group feature importance.
-    For group importance, features within the same group are permuted together.
-
-    References
-    ----------
-    .. footbibliography::
-    """
-
-    # check parameters
-    _check_vim_predict_method(method)
-
-    # define a random generator
-    check_random_state(random_state)
-    rng = np.random.RandomState(random_state)
-
-    # management of the group
-    if groups is None:
-        n_groups = X.shape[1]
-        groups_ = {j: [j] for j in range(n_groups)}
-    else:
-        n_groups = len(groups)
-        if type(list(groups.values())[0][0]) is str:
-            groups_ = {}
-            for key, indexe_names in zip(groups.keys(), groups.values()):
-                groups_[key] = []
-                for index_name in indexe_names:
-                    index = np.where(index_name == X.columns)[0]
-                    assert len(index) == 1
-                    groups_[key].append(index)
-        else:
-            groups_ = groups
-
-    X_ = np.asarray(X)  # avoid the management of panda dataframe
-
-    # compute the reference residual
-    try:
-        y_pred = getattr(estimator, method)(X)
-        estimator_ = estimator
-    except NotFittedError:
-        estimator_ = clone(estimator)
-        # case for not fitted esimator
-        estimator_.fit(X_, y)
-        y_pred = getattr(estimator_, method)(X)
-    loss_reference = loss(y, y_pred)
-
-    # Parallelize the computation of the residual for each permutation
-    # of each group
-    list_loss_j = Parallel(n_jobs=n_jobs)(
-        delayed(_predict_one_group)(
-            estimator_,
-            groups_[j],
-            X_,
-            y,
-            loss,
-            n_permutations,
-            rng,
-            method,
-        )
-        for j in groups_.keys()
-    )
-    list_loss_j = np.array(list_loss_j)
-
-    # compute the importance
-    # equation 5 of mi2021permutation
-    importance = np.mean(list_loss_j - loss_reference, axis=1)
-
-    return importance, list_loss_j, loss_reference
-
-
-def _predict_one_group(estimator, group_ids, X, y, loss, n_permutations, rng, method):
-    """
-    Compute prediction loss scores after permuting a single group of features.
-
-    Parameters
-    ----------
-    estimator : object
-        Fitted estimator implementing scikit-learn API
-    group_ids : list
-        Indices of features in the group to permute
-    X : np.ndarray
-        Input data matrix
-    y : np.ndarray
-        Target values
-    loss : callable
-        Loss function to evaluate predictions
-    n_permutations : int
-        Number of permutations to perform
-    rng : RandomState
-        Random number generator instance
-    method : str
-        Prediction method to use ('predict', 'predict_proba', etc.)
-
-    Returns
-    -------
-    list
-        Loss values for each permutation
-    """
-    # get ids
-    non_group_ids = np.delete(np.arange(X.shape[1]), group_ids)
-
-    # get data
-    X_j = X[:, group_ids].copy()
-    X_minus_j = np.delete(X, group_ids, axis=1)
-
-    # Create an array X_perm_j of shape (n_permutations, n_samples, n_features)
-    # where the j-th group of covariates is permuted
-    X_perm_j = np.empty((n_permutations, X.shape[0], X.shape[1]))
-    X_perm_j[:, :, non_group_ids] = X_minus_j
-
-    # Create the permuted data for the j-th group of covariates
-    group_j_permuted = np.array([rng.permutation(X_j) for _ in range(n_permutations)])
-    X_perm_j[:, :, group_ids] = group_j_permuted
-
-    # Reshape X_perm_j to allow for remove the indexation by groups
-    X_perm_batch = X_perm_j.reshape(-1, X.shape[1])
-    y_pred_perm = getattr(estimator, method)(X_perm_batch)
-
-    if y_pred_perm.ndim == 1:
-        # one value per y: regression
-        y_pred_perm = y_pred_perm.reshape(n_permutations, X.shape[0])
-    else:
-        # probability per y: classification
-        y_pred_perm = y_pred_perm.reshape(
-            n_permutations, X.shape[0], y_pred_perm.shape[1]
-        )
-    loss_i = [loss(y, y_pred_perm[i]) for i in range(n_permutations)]
-    return loss_i
diff --git a/src/hidimstat/permutation_importance_scikitlearn.py b/src/hidimstat/permutation_importance_scikitlearn.py
deleted file mode 100644
index 26c9f269..00000000
--- a/src/hidimstat/permutation_importance_scikitlearn.py
+++ /dev/null
@@ -1,309 +0,0 @@
-"""Permutation importance for estimators."""
-
-import numbers
-
-import numpy as np
-
-from sklearn.ensemble._bagging import _generate_indices
-from sklearn.metrics import check_scoring, get_scorer_names
-from sklearn.model_selection._validation import _aggregate_score_dicts
-from sklearn.utils import Bunch, _safe_indexing, check_array, check_random_state
-from sklearn.utils._param_validation import (
-    HasMethods,
-    Integral,
-    Interval,
-    RealNotInt,
-    StrOptions,
-    validate_params,
-)
-from sklearn.utils.parallel import Parallel, delayed
-
-
-def _weights_scorer(scorer, estimator, X, y, sample_weight):
-    if sample_weight is not None:
-        return scorer(estimator, X, y, sample_weight=sample_weight)
-    return scorer(estimator, X, y)
-
-
-def _calculate_permutation_scores(
-    estimator,
-    X,
-    y,
-    sample_weight,
-    col_idx,
-    random_state,
-    n_repeats,
-    scorer,
-    max_samples,
-):
-    """Calculate score when `col_idx` is permuted."""
-    random_state = check_random_state(random_state)
-
-    # Work on a copy of X to ensure thread-safety in case of threading based
-    # parallelism. Furthermore, making a copy is also useful when the joblib
-    # backend is 'loky' (default) or the old 'multiprocessing': in those cases,
-    # if X is large it will be automatically be backed by a readonly memory map
-    # (memmap). X.copy() on the other hand is always guaranteed to return a
-    # writable data-structure whose columns can be shuffled inplace.
-    if max_samples < X.shape[0]:
-        row_indices = _generate_indices(
-            random_state=random_state,
-            bootstrap=False,
-            n_population=X.shape[0],
-            n_samples=max_samples,
-        )
-        X_permuted = _safe_indexing(X, row_indices, axis=0)
-        y = _safe_indexing(y, row_indices, axis=0)
-        if sample_weight is not None:
-            sample_weight = _safe_indexing(sample_weight, row_indices, axis=0)
-    else:
-        X_permuted = X.copy()
-
-    scores = []
-    shuffling_idx = np.arange(X_permuted.shape[0])
-    for _ in range(n_repeats):
-        random_state.shuffle(shuffling_idx)
-        if hasattr(X_permuted, "iloc"):
-            col = X_permuted.iloc[shuffling_idx, col_idx]
-            col.index = X_permuted.index
-            X_permuted[X_permuted.columns[col_idx]] = col
-        else:
-            X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]
-        scores.append(_weights_scorer(scorer, estimator, X_permuted, y, sample_weight))
-
-    if isinstance(scores[0], dict):
-        scores = _aggregate_score_dicts(scores)
-    else:
-        scores = np.array(scores)
-
-    return scores
-
-
-def _create_importances_bunch(baseline_score, permuted_score):
-    """Compute the importances as the decrease in score.
-
-    Parameters
-    ----------
-    baseline_score : ndarray of shape (n_features,)
-        The baseline score without permutation.
-    permuted_score : ndarray of shape (n_features, n_repeats)
-        The permuted scores for the `n` repetitions.
-
-    Returns
-    -------
-    importances : :class:`~sklearn.utils.Bunch`
-        Dictionary-like object, with the following attributes.
-        importances_mean : ndarray, shape (n_features, )
-            Mean of feature importance over `n_repeats`.
-        importances_std : ndarray, shape (n_features, )
-            Standard deviation over `n_repeats`.
-        importances : ndarray, shape (n_features, n_repeats)
-            Raw permutation importance scores.
-    """
-    importances = baseline_score - permuted_score
-    return Bunch(
-        importances_mean=np.mean(importances, axis=1),
-        importances_std=np.std(importances, axis=1),
-        importances=importances,
-    )
-
-
-@validate_params(
-    {
-        "estimator": [HasMethods(["fit"])],
-        "X": ["array-like"],
-        "y": ["array-like", None],
-        "scoring": [
-            StrOptions(set(get_scorer_names())),
-            callable,
-            list,
-            tuple,
-            dict,
-            None,
-        ],
-        "n_repeats": [Interval(Integral, 1, None, closed="left")],
-        "n_jobs": [Integral, None],
-        "random_state": ["random_state"],
-        "sample_weight": ["array-like", None],
-        "max_samples": [
-            Interval(Integral, 1, None, closed="left"),
-            Interval(RealNotInt, 0, 1, closed="right"),
-        ],
-    },
-    prefer_skip_nested_validation=True,
-)
-def permutation_importance(
-    estimator,
-    X,
-    y,
-    *,
-    scoring=None,
-    n_repeats=5,
-    n_jobs=None,
-    random_state=None,
-    sample_weight=None,
-    max_samples=1.0,
-):
-    """Permutation importance for feature evaluation [BRE]_.
-
-    The :term:`estimator` is required to be a fitted estimator. `X` can be the
-    data set used to train the estimator or a hold-out set. The permutation
-    importance of a feature is calculated as follows. First, a baseline metric,
-    defined by :term:`scoring`, is evaluated on a (potentially different)
-    dataset defined by the `X`. Next, a feature column from the validation set
-    is permuted and the metric is evaluated again. The permutation importance
-    is defined to be the difference between the baseline metric and metric from
-    permutating the feature column.
-
-    Read more in the :ref:`User Guide <permutation_importance>`.
-
-    Parameters
-    ----------
-    estimator : object
-        An estimator that has already been :term:`fitted` and is compatible
-        with :term:`scorer`.
-
-    X : ndarray or DataFrame, shape (n_samples, n_features)
-        Data on which permutation importance will be computed.
-
-    y : array-like or None, shape (n_samples, ) or (n_samples, n_classes)
-        Targets for supervised or `None` for unsupervised.
-
-    scoring : str, callable, list, tuple, or dict, default=None
-        Scorer to use.
-        If `scoring` represents a single score, one can use:
-
-        - a single string (see :ref:`scoring_parameter`);
-        - a callable (see :ref:`scoring`) that returns a single value.
-
-        If `scoring` represents multiple scores, one can use:
-
-        - a list or tuple of unique strings;
-        - a callable returning a dictionary where the keys are the metric
-          names and the values are the metric scores;
-        - a dictionary with metric names as keys and callables a values.
-
-        Passing multiple scores to `scoring` is more efficient than calling
-        `permutation_importance` for each of the scores as it reuses
-        predictions to avoid redundant computation.
-
-        If None, the estimator's default scorer is used.
-
-    n_repeats : int, default=5
-        Number of times to permute a feature.
-
-    n_jobs : int or None, default=None
-        Number of jobs to run in parallel. The computation is done by computing
-        permutation score for each columns and parallelized over the columns.
-        `None` means 1 unless in a :obj:`joblib.parallel_backend` context.
-        `-1` means using all processors. See :term:`Glossary <n_jobs>`
-        for more details.
-
-    random_state : int, RandomState instance, default=None
-        Pseudo-random number generator to control the permutations of each
-        feature.
-        Pass an int to get reproducible results across function calls.
-        See :term:`Glossary <random_state>`.
-
-    sample_weight : array-like of shape (n_samples,), default=None
-        Sample weights used in scoring.
-
-        .. versionadded:: 0.24
-
-    max_samples : int or float, default=1.0
-        The number of samples to draw from X to compute feature importance
-        in each repeat (without replacement).
-
-        - If int, then draw `max_samples` samples.
-        - If float, then draw `max_samples * X.shape[0]` samples.
-        - If `max_samples` is equal to `1.0` or `X.shape[0]`, all samples
-          will be used.
-
-        While using this option may provide less accurate importance estimates,
-        it keeps the method tractable when evaluating feature importance on
-        large datasets. In combination with `n_repeats`, this allows to control
-        the computational speed vs statistical accuracy trade-off of this method.
-
-        .. versionadded:: 1.0
-
-    Returns
-    -------
-    result : :class:`~sklearn.utils.Bunch` or dict of such instances
-        Dictionary-like object, with the following attributes.
-
-        importances_mean : ndarray of shape (n_features, )
-            Mean of feature importance over `n_repeats`.
-        importances_std : ndarray of shape (n_features, )
-            Standard deviation over `n_repeats`.
-        importances : ndarray of shape (n_features, n_repeats)
-            Raw permutation importance scores.
-
-        If there are multiple scoring metrics in the scoring parameter
-        `result` is a dict with scorer names as keys (e.g. 'roc_auc') and
-        `Bunch` objects like above as values.
-
-    References
-    ----------
-    .. [BRE] :doi:`L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32,
-             2001. <10.1023/A:1010933404324>`
-
-    Examples
-    --------
-    >>> from sklearn.linear_model import LogisticRegression
-    >>> from sklearn.inspection import permutation_importance
-    >>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],
-    ...      [0, 9, 9],[0, 9, 9],[0, 9, 9]]
-    >>> y = [1, 1, 1, 0, 0, 0]
-    >>> clf = LogisticRegression().fit(X, y)
-    >>> result = permutation_importance(clf, X, y, n_repeats=10,
-    ...                                 random_state=0)
-    >>> result.importances_mean
-    array([0.4666..., 0.       , 0.       ])
-    >>> result.importances_std
-    array([0.2211..., 0.       , 0.       ])
-    """
-    if not hasattr(X, "iloc"):
-        X = check_array(X, force_all_finite="allow-nan", dtype=None)
-
-    # Precompute random seed from the random state to be used
-    # to get a fresh independent RandomState instance for each
-    # parallel call to _calculate_permutation_scores, irrespective of
-    # the fact that variables are shared or not depending on the active
-    # joblib backend (sequential, thread-based or process-based).
-    random_state = check_random_state(random_state)
-    random_seed = random_state.randint(np.iinfo(np.int32).max + 1)
-
-    if not isinstance(max_samples, numbers.Integral):
-        max_samples = int(max_samples * X.shape[0])
-    elif max_samples > X.shape[0]:
-        raise ValueError("max_samples must be <= n_samples")
-
-    scorer = check_scoring(estimator, scoring=scoring)
-    baseline_score = _weights_scorer(scorer, estimator, X, y, sample_weight)
-
-    scores = Parallel(n_jobs=n_jobs)(
-        delayed(_calculate_permutation_scores)(
-            estimator,
-            X,
-            y,
-            sample_weight,
-            col_idx,
-            random_seed,
-            n_repeats,
-            scorer,
-            max_samples,
-        )
-        for col_idx in range(X.shape[1])
-    )
-
-    if isinstance(baseline_score, dict):
-        return {
-            name: _create_importances_bunch(
-                baseline_score[name],
-                # unpack the permuted scores
-                np.array([scores[col_idx][name] for col_idx in range(X.shape[1])]),
-            )
-            for name in baseline_score
-        }
-    else:
-        return _create_importances_bunch(baseline_score, np.array(scores))
diff --git a/test/test_permutation_importance.py b/test/test_permutation_importance.py
index 284d49d7..a0c76bae 100644
--- a/test/test_permutation_importance.py
+++ b/test/test_permutation_importance.py
@@ -4,10 +4,10 @@
 from sklearn.metrics import log_loss
 from sklearn.model_selection import train_test_split
 
-from hidimstat.permutation_importance import PermutationImportance
+from hidimstat.permutation_importance_function import permutation_importance
 
 
-def test_permutation_importance(linear_scenario):
+def test_permutation_importance_no_fitting(linear_scenario):
     X, y, beta = linear_scenario
     important_features = np.where(beta != 0)[0]
     non_important_features = np.where(beta == 0)[0]
@@ -15,9 +15,9 @@ def test_permutation_importance(linear_scenario):
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
     regression_model = LinearRegression()
-    regression_model.fit(X_train, y_train)
-
-    pi = PermutationImportance(
+    importance, list_loss_j, loss_reference = permutation_importance(
+        X_test,
+        y_test,
         estimator=regression_model,
         n_permutations=20,
         method="predict",
@@ -25,14 +25,6 @@ def test_permutation_importance(linear_scenario):
         n_jobs=1,
     )
 
-    pi.fit(
-        X_train,
-        y_train,
-        groups=None,
-    )
-    vim = pi.score(X_test, y_test)
-
-    importance = vim["importance"]
     assert importance.shape == (X.shape[1],)
     assert (
         importance[important_features].mean()
@@ -46,45 +38,83 @@ def test_permutation_importance(linear_scenario):
     }
     X_df = pd.DataFrame(X, columns=[f"col_{i}" for i in range(X.shape[1])])
     X_train_df, X_test_df, y_train, y_test = train_test_split(X_df, y, random_state=0)
-    regression_model.fit(X_train_df, y_train)
-    pi = PermutationImportance(
+    regression_model = LinearRegression()
+    importance, list_loss_j, loss_reference = permutation_importance(
+        X_test_df,
+        y_test,
         estimator=regression_model,
         n_permutations=20,
         method="predict",
         random_state=0,
         n_jobs=1,
+        groups=groups,
     )
-    pi.fit(
-        X_train_df,
-        y_train,
+
+    assert importance[0].mean() > importance[1].mean()
+
+    # Same with groups
+    groups = {
+        "group_0": [i for i in important_features],
+        "the_group_1": [i for i in non_important_features],
+    }
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+    regression_model = LinearRegression()
+    importance, list_loss_j, loss_reference = permutation_importance(
+        X_test,
+        y_test,
+        estimator=regression_model,
+        n_permutations=20,
+        method="predict",
+        random_state=0,
+        n_jobs=1,
         groups=groups,
     )
-    vim = pi.score(X_test_df, y_test)
 
-    importance = vim["importance"]
     assert importance[0].mean() > importance[1].mean()
 
     # Classification case
-    y_clf = np.where(y > np.median(y), 1, 0)
+    y_clf = np.zeros_like(y)
+    for i, quantile in enumerate(np.arange(0.2, 0.8, 0.2)):
+        y_clf[np.where(y > np.quantile(y, quantile))] = i
     _, _, y_train_clf, y_test_clf = train_test_split(X, y_clf, random_state=0)
     logistic_model = LogisticRegression()
-    logistic_model.fit(X_train, y_train_clf)
 
-    pi_clf = PermutationImportance(
+    importance_clf, list_loss_j, loss_reference = permutation_importance(
+        X_test,
+        y_test_clf,
         estimator=logistic_model,
         n_permutations=20,
         method="predict_proba",
         random_state=0,
         n_jobs=1,
         loss=log_loss,
-    )
-
-    pi_clf.fit(
-        X_train,
-        y_train_clf,
         groups=None,
     )
-    vim_clf = pi_clf.score(X_test, y_test_clf)
 
-    importance_clf = vim_clf["importance"]
     assert importance_clf.shape == (X.shape[1],)
+
+
+def test_with_fitting(linear_scenario):
+    X, y, beta = linear_scenario
+    important_features = np.where(beta != 0)[0]
+    non_important_features = np.where(beta == 0)[0]
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    regression_model = LinearRegression()
+    regression_model.fit(X_train, y_train)
+    importance, list_loss_j, loss_reference = permutation_importance(
+        X_test,
+        y_test,
+        estimator=regression_model,
+        n_permutations=20,
+        method="predict",
+        random_state=0,
+        n_jobs=1,
+    )
+
+    assert importance.shape == (X.shape[1],)
+    assert (
+        importance[important_features].mean()
+        > importance[non_important_features].mean()
+    )
diff --git a/test/test_permutation_importance_function.py b/test/test_permutation_importance_function.py
deleted file mode 100644
index a0c76bae..00000000
--- a/test/test_permutation_importance_function.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import numpy as np
-import pandas as pd
-from sklearn.linear_model import LinearRegression, LogisticRegression
-from sklearn.metrics import log_loss
-from sklearn.model_selection import train_test_split
-
-from hidimstat.permutation_importance_function import permutation_importance
-
-
-def test_permutation_importance_no_fitting(linear_scenario):
-    X, y, beta = linear_scenario
-    important_features = np.where(beta != 0)[0]
-    non_important_features = np.where(beta == 0)[0]
-
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-
-    regression_model = LinearRegression()
-    importance, list_loss_j, loss_reference = permutation_importance(
-        X_test,
-        y_test,
-        estimator=regression_model,
-        n_permutations=20,
-        method="predict",
-        random_state=0,
-        n_jobs=1,
-    )
-
-    assert importance.shape == (X.shape[1],)
-    assert (
-        importance[important_features].mean()
-        > importance[non_important_features].mean()
-    )
-
-    # Same with groups and a pd.DataFrame
-    groups = {
-        "group_0": [f"col_{i}" for i in important_features],
-        "the_group_1": [f"col_{i}" for i in non_important_features],
-    }
-    X_df = pd.DataFrame(X, columns=[f"col_{i}" for i in range(X.shape[1])])
-    X_train_df, X_test_df, y_train, y_test = train_test_split(X_df, y, random_state=0)
-    regression_model = LinearRegression()
-    importance, list_loss_j, loss_reference = permutation_importance(
-        X_test_df,
-        y_test,
-        estimator=regression_model,
-        n_permutations=20,
-        method="predict",
-        random_state=0,
-        n_jobs=1,
-        groups=groups,
-    )
-
-    assert importance[0].mean() > importance[1].mean()
-
-    # Same with groups
-    groups = {
-        "group_0": [i for i in important_features],
-        "the_group_1": [i for i in non_important_features],
-    }
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-    regression_model = LinearRegression()
-    importance, list_loss_j, loss_reference = permutation_importance(
-        X_test,
-        y_test,
-        estimator=regression_model,
-        n_permutations=20,
-        method="predict",
-        random_state=0,
-        n_jobs=1,
-        groups=groups,
-    )
-
-    assert importance[0].mean() > importance[1].mean()
-
-    # Classification case
-    y_clf = np.zeros_like(y)
-    for i, quantile in enumerate(np.arange(0.2, 0.8, 0.2)):
-        y_clf[np.where(y > np.quantile(y, quantile))] = i
-    _, _, y_train_clf, y_test_clf = train_test_split(X, y_clf, random_state=0)
-    logistic_model = LogisticRegression()
-
-    importance_clf, list_loss_j, loss_reference = permutation_importance(
-        X_test,
-        y_test_clf,
-        estimator=logistic_model,
-        n_permutations=20,
-        method="predict_proba",
-        random_state=0,
-        n_jobs=1,
-        loss=log_loss,
-        groups=None,
-    )
-
-    assert importance_clf.shape == (X.shape[1],)
-
-
-def test_with_fitting(linear_scenario):
-    X, y, beta = linear_scenario
-    important_features = np.where(beta != 0)[0]
-    non_important_features = np.where(beta == 0)[0]
-
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-
-    regression_model = LinearRegression()
-    regression_model.fit(X_train, y_train)
-    importance, list_loss_j, loss_reference = permutation_importance(
-        X_test,
-        y_test,
-        estimator=regression_model,
-        n_permutations=20,
-        method="predict",
-        random_state=0,
-        n_jobs=1,
-    )
-
-    assert importance.shape == (X.shape[1],)
-    assert (
-        importance[important_features].mean()
-        > importance[non_important_features].mean()
-    )
diff --git a/test/test_permutation_importance_scikitlearn.py b/test/test_permutation_importance_scikitlearn.py
deleted file mode 100644
index 03a02c1d..00000000
--- a/test/test_permutation_importance_scikitlearn.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import numpy as np
-from sklearn.linear_model import LinearRegression, LogisticRegression
-from sklearn.model_selection import train_test_split
-
-from hidimstat.permutation_importance_scikitlearn import permutation_importance
-
-
-def test_permutation_importance(linear_scenario):
-    X, y, beta = linear_scenario
-    important_features = np.where(beta != 0)[0]
-    non_important_features = np.where(beta == 0)[0]
-
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-
-    regression_model = LinearRegression()
-    regression_model.fit(X_train, y_train)
-    vim = permutation_importance(
-        regression_model,
-        X_test,
-        y_test,
-        n_repeats=20,
-        scoring="r2",
-        random_state=0,
-        n_jobs=1,
-    )
-
-    importance = vim["importances_mean"]
-
-    assert importance.shape == (X.shape[1],)
-    assert (
-        importance[important_features].mean()
-        > importance[non_important_features].mean()
-    )
-
-    # impossible with groups
-    # # Same with groups and a pd.DataFrame
-    # groups = {
-    #     "group_0": [f"col_{i}" for i in important_features],
-    #     "the_group_1": [f"col_{i}" for i in non_important_features],
-    # }
-    # X_df = pd.DataFrame(X, columns=[f"col_{i}" for i in range(X.shape[1])])
-    # X_train_df, X_test_df, y_train, y_test = train_test_split(X_df, y, random_state=0)
-    # regression_model = LinearRegression()
-    # regression_model.fit(X_train_df, y_train)
-    # vim = permutation_importance(
-    #     regression_model,
-    #     X_test_df,
-    #     y_test,
-    #     n_repeats=20,
-    #     scoring='r2',
-    #     random_state=0,
-    #     n_jobs=1,
-    #     groups=groups
-    # )
-    # importance = vim['importances_mean']
-    #
-    # assert importance[0].mean() > importance[1].mean()
-
-    # Classification case
-    y_clf = np.where(y > np.median(y), 1, 0)
-    _, _, y_train_clf, y_test_clf = train_test_split(X, y_clf, random_state=0)
-    logistic_model = LogisticRegression()
-    logistic_model.fit(X_train, y_train_clf)
-    vim_clf = permutation_importance(
-        logistic_model,
-        X_test,
-        y_test_clf,
-        n_repeats=20,
-        scoring="neg_log_loss",
-        random_state=0,
-        n_jobs=1,
-    )
-    importance_clf = vim_clf["importances_mean"]
-
-    assert importance_clf.shape == (X.shape[1],)