Skip to content

Commit

Permalink
Improve coverage and test
Browse files Browse the repository at this point in the history
  • Loading branch information
lionelkusch committed Jan 17, 2025
1 parent df45c82 commit e3eeb72
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 22 deletions.
5 changes: 1 addition & 4 deletions src/hidimstat/knockoffs.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,6 @@ def model_x_knockoff_filter(test_score, fdr=0.1, offset=1, selection_only=True):
This function calculates the knockoff threshold based on the test statistics and the
desired FDR level. It then identifies the selected variables based on the threshold.
"""
if offset not in (0, 1):
raise ValueError("'offset' must be either 0 or 1")

# run the knockoff filter
threshold = _knockoff_threshold(test_score, fdr=fdr, offset=offset)
selected = np.where(test_score >= threshold)[0]
Expand Down Expand Up @@ -561,7 +558,7 @@ def _stat_coefficient_diff(X, X_tilde, y, estimator, preconfigure_estimator=None
):
coef = np.ravel(estimator.best_estimator_.coef_) # for CV object
else:
raise ValueError("estimator should be linear")
raise TypeError("estimator should be linear")
# Equation 1.7 in barber2015controlling or 3.6 of candes2018panning
test_score = np.abs(coef[:n_features]) - np.abs(coef[n_features:])
return test_score
Expand Down
74 changes: 56 additions & 18 deletions test/test_knockoff.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeRegressor


def test_knockoff_aggregation():
""" Test knockoff with aggregation"""
n = 500
p = 100
snr = 5
Expand All @@ -26,7 +28,7 @@ def test_knockoff_aggregation():
X, y, _, non_zero_index = simu_data(n, p, snr=snr, seed=0)

test_scores = model_x_knockoff_aggregation(
X, y, n_bootstraps=n_bootstraps, random_state=0
X, y, n_bootstraps=n_bootstraps, random_state=None
)
selected_verbose, aggregated_pval, pvals = model_x_knockoff_bootstrap_quantile(
test_scores, fdr=fdr, selection_only=False
Expand Down Expand Up @@ -99,6 +101,7 @@ def test_knockoff_aggregation():


def test_model_x_knockoff():
"""Test the selection of vatiabel from knockoff"""
seed = 42
fdr = 0.2
n = 300
Expand All @@ -109,17 +112,31 @@ def test_model_x_knockoff():
test_score,
fdr=fdr,
)
with pytest.raises(ValueError, match="'offset' must be either 0 or 1"):
model_x_knockoff_filter(test_score, offset=2)
with pytest.raises(ValueError, match="'offset' must be either 0 or 1"):
model_x_knockoff_filter(test_score, offset=-0.1)
ko_result_bis, threshold = model_x_knockoff_filter(
test_score,
fdr=fdr,
selection_only=False
)
assert np.all(ko_result == ko_result_bis)
fdp, power = cal_fdp_power(ko_result, non_zero)
assert fdp <= 0.2
assert power > 0.7

ko_result = model_x_knockoff_pvalue(test_score, fdr=fdr, selection_only=True)
ko_result_bis, pvals = model_x_knockoff_pvalue(test_score, fdr=fdr, selection_only=False)
assert np.all(ko_result == ko_result_bis)
fdp, power = cal_fdp_power(ko_result, non_zero)
assert fdp <= 0.2
assert power > 0.7
assert np.all( 0<=pvals) or np.all(pvals<=1)


def test_model_x_knockoff_estimator():
""" Test knockoff with a crossvalidation estimator"""
seed = 42
fdr = 0.2
n = 300
Expand All @@ -141,6 +158,27 @@ def test_model_x_knockoff_estimator():
assert fdp <= 0.2
assert power > 0.7

def test_model_x_knockoff_exception():
n = 50
p = 100
seed = 45
rgn = np.random.RandomState(seed)
X = rgn.randn(n, p)
y = rgn.randn(n)
with pytest.raises(TypeError, match="You should not use this function"):
model_x_knockoff(
X,
y,
estimator=Lasso(),
)
with pytest.raises(TypeError, match="estimator should be linear"):
model_x_knockoff(
X,
y,
estimator=DecisionTreeRegressor(),
preconfigure_estimator=None
)


def test_estimate_distribution():
"""
Expand Down Expand Up @@ -178,6 +216,7 @@ def test_estimate_distribution():


def test_gaussian_knockoff_equi():
"""test function of gaussian knockoff"""
seed = 42
n = 100
p = 50
Expand All @@ -195,21 +234,19 @@ def test_gaussian_knockoff_equi_warning():
seed = 42
n = 100
p = 50
tol = 1e-7
tol = 1e-14
rgn = np.random.RandomState(seed)
X = rgn.randn(n, p)
mu = X.mean(axis=0)
# create a positive definite matrix
sigma = rgn.randn(p, p)
while not np.all(np.linalg.eigvalsh(sigma) > tol):
sigma += 0.1 * np.eye(p)
sigma = sigma.T * sigma
sigma *= 1e-13
u, s, vh = np.linalg.svd(rgn.randn(p, p))
d = np.eye(p) * tol/10
sigma=u*d*u.T
with pytest.warns(
UserWarning,
match="The conditional covariance matrix for knockoffs is not positive",
):
X_tilde = gaussian_knockoff_generation(X, mu, sigma, seed=seed * 2, tol=1e-10)
X_tilde = gaussian_knockoff_generation(X, mu, sigma, seed=seed * 2, tol=tol)

assert X_tilde.shape == (n, p)

Expand All @@ -220,22 +257,23 @@ def test_s_equi_not_define_positive():
tol = 1e-7
seed = 42

# random matrix
# random positive matrix
rgn = np.random.RandomState(seed)
sigma = rgn.randn(n, n)
sigma -= np.min(sigma)
a = rgn.randn(n, n)
a -= np.min(a)
with pytest.raises(
Exception, match="The covariance matrix is not positive-definite."
):
_s_equi(sigma)
_s_equi(a)

# positive matrix
while not np.all(np.linalg.eigvalsh(sigma) > tol):
sigma += 0.1 * np.eye(n)
# matrix with positive eigenvalues, positive diagonal
while not np.all(np.linalg.eigvalsh(a) > tol):
a += 0.1 * np.eye(n)
with pytest.warns(UserWarning, match="The equi-correlated matrix"):
_s_equi(sigma)
_s_equi(a)

# positive definite matrix
sigma = sigma.T * sigma
sigma = (sigma + sigma.T) / 2
u, s, vh = np.linalg.svd(a)
d = np.eye(n)
sigma=u*d*u.T
_s_equi(sigma)

0 comments on commit e3eeb72

Please sign in to comment.