add leave-one-out Lasso model

rnburn · Feb 8, 2025 · a398163 · a398163
1 parent 78f3e33
commit a398163
Show file tree

Hide file tree

Showing 11 changed files with 398 additions and 0 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -60,3 +60,9 @@ jobs:
     - name: Bounded Normal Testing
       run: |
         python test/bounded_normal_test.py
+    - name: Lasso Testing
+      run: |
+        python test/lasso_test.py
+    - name: Leave-one-out Lasso Testing
+      run: |
+        python test/lo_lasso_test.py
diff --git a/bbai/glm/__init__.py b/bbai/glm/__init__.py
@@ -1,10 +1,14 @@
 from ._logistic_regression import LogisticRegression
 from ._logistic_regression_map import LogisticRegressionMAP
+from ._random_dataset import RandomRegressionDataset
+from ._lasso import Lasso
+from ._lasso_validate import evaluate_lo_cost_slow, LassoKKT, LassoGridCv
 from ._ridge_regression import RidgeRegression
 from ._bayesian_ridge_regression import BayesianRidgeRegression
 from ._bayesian_logistic_regression1 import BayesianLogisticRegression1
 
 __all__ = [
+        'Lasso',
         'LogisticRegression',
         'LogisticRegressionMAP',
         'BayesianRidgeRegression',

diff --git a/bbai/glm/_lasso.py b/bbai/glm/_lasso.py
@@ -0,0 +1,112 @@
+from .._computation._bridge import glm
+
+import numpy as np
+
+class Lasso:
+    """Implements Lasso regression with the regularization parameter fit so
+    as to maximize performance on leave-one-out cross-validation.
+
+    Parameters
+    ----------
+    fit_intercept : bool, default=True
+        Whether a constant column should be added to the feature matrix.
+
+    fit_beta_path : bool, default=False
+        If lambda is specified, fit the matrix that determines the regressors, beta, for 
+        lambda in the range [specified_lambda, oo)
+
+    lambda : float, default=None
+        Regularization strength. If None, bbai will choose lambda so as to maximize performance
+        on leave-one-out cross-validation.
+         
+        If X represents the design matrix and y the target vector, then regressors, beta, will 
+        determined so that 
+              X^T (y - X beta) = lambda gamma
+        where 
+              gamma_j = sign(beta_j), if beta_j != 0
+              gamma_j in [-1, 1], otherwise
+        If fit_intercept is true, then the intercept will correspond to an implicit column of
+        ones in X with no regularization.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_diabetes
+    >>> from bbai.glm import Lasso
+    >>> X, y = load_diabetes(return_X_y=True)
+    >>> model = Lasso().fit(X, y) 
+                    # Fit lasso regression using hyperparameters that maximize performance on
+                    # leave-one-out cross-validation.
+    >>> print(model.lambda_) # print out the hyperparameter that maximizes
+                             # leave-one-out cross validation performance
+          # prints: 22.179
+    >>> print(model.intercept_, model.coef_) # print out the coefficients that maximizes
+                                             # leave-one-out cross validation performance
+          # prints: 152.13 [0, -193.9, 521.8, 295.15, -99.28, 0, -222.67, 0, 511.95, 52.85]
+    """
+    def __init__(self, lambda_=None, fit_intercept=True, fit_beta_path=False):
+        self.params_ = {}
+        self.set_params(
+                lambda_ = lambda_,
+                fit_intercept = fit_intercept,
+                fit_beta_path = fit_beta_path
+        )
+        self.coef_ = None
+
+    def get_params(self, deep=True):
+        """Get parameters for this estimator."""
+        return self.params_
+
+    def set_params(self, **parameters):
+        """Set parameters for this estimator."""
+        for parameter, value in parameters.items():
+            self.params_[parameter] = value
+
+    def fit(self, X, y):
+        """Fit the model to the training data."""
+        assert X.shape[0] == y.shape[0] and X.shape[1] > 0
+        lambda_ = self.params_['lambda_']
+        fit_intercept = self.params_['fit_intercept']
+        fit_beta_path = self.params_['fit_beta_path']
+        if lambda_ is None:
+            res = glm.loo_lasso_fit(X, y, fit_intercept)
+            self.cost_fn_ = res['cost_function']
+            self.lambda_ = res['lambda_opt']
+            self.loo_mse_ = res['cost_opt'] / len(y)
+            beta = res['beta_opt']
+        else:
+            res = glm.lasso_fit(X, y, lambda_, fit_intercept, fit_beta_path)
+            beta = res['beta']
+            self.lambda_ = lambda_
+            self.beta_path_ = res['beta_path']
+
+        self.beta_ = beta
+        if fit_intercept: 
+            self.intercept_ = beta[0]
+            self.coef_ = beta[1:]
+        else:
+            self.coef_ = beta
+            self.intercept_ = 0
+        return self
+
+    def predict(self, Xp):
+        """Predict target values."""
+        fit_intercept = self.params_['fit_intercept']
+        return self.intercept_ + np.dot(Xp, self.coef_)
+
+    def evaluate_lo_cost(self, lda):
+        """Evaluate the cost at an arbitrary value of lambda.
+
+        The cost function is produced as an artifact of optimization when lambda=None
+        """
+        # Note: the evaluation function isn't very efficient. It's used mainly for testing.
+        # 
+        # If you want more efficient evaluation, you should extract the segments of polynomial and
+        # use them directly.
+        N = self.cost_fn_.shape[1]
+        if lda >= self.cost_fn_[0, N-1]:
+            return self.cost_fn_[1, N-1]
+        for j in range(N-1):
+            if lda >= self.cost_fn_[0, j+1]:
+                continue
+            a0, a1, a2 = self.cost_fn_[1:, j]
+            return a0 + a1 * lda + a2 * lda**2
diff --git a/bbai/glm/_lasso_validate.py b/bbai/glm/_lasso_validate.py
@@ -0,0 +1,72 @@
+import numpy as np
+
+def evaluate_lo_cost_slow(X, y, m):
+    """Given a model, evaluate leave-one-out cross validation using a brute force approach.
+
+    Intended for testing."""
+    n = len(y)
+    res = 0.0
+    for i in range(n):
+        ix = [ip for ip in range(n) if ip != i]
+        Xm = X[ix, :]
+        ym = y[ix]
+        m.fit(Xm, ym)
+        pred = m.predict(X[i])
+        res += (y[i] - pred)**2
+    return res
+
+class LassoGridCv:
+    """Construct a grid of leave-one-out cross validation values. Used for testing."""
+    def __init__(self, f, X, y, lambda_max, n=10):
+        cvs = []
+        self.lambdas = np.linspace(0, lambda_max, n)
+        for lda in self.lambdas:
+            cv = f(X, y, lda)
+            cvs.append(cv)
+        self.cvs = cvs
+        self.cv_min = np.min(cvs)
+
+class LassoKKT:
+    """Verify Karus-Kuhn-Tucker conditions for a Lasso solution."""
+    def __init__(self, X, y, lda, beta, with_intercept=False):
+        if with_intercept:
+            X = np.hstack((np.ones((len(y), 1)), X))
+        y_tilde = y - np.dot(X, beta)
+        self.with_intercept_ = with_intercept
+        self.kkt_ = np.dot(X.T, y_tilde)
+        self.beta_ = beta
+        self.lambda_ = lda
+
+    def __str__(self):
+        return str(self.kkt_)
+
+    def within(self, tol1, tol2=0.0):
+        beta = self.beta_
+        kkt = self.kkt_
+
+        # intercept
+        if self.with_intercept_:
+            b0 = beta[0]
+            cond = kkt[0]
+            if np.abs(b0) != 0.0:
+                cond /= b0
+            if np.abs(cond) > tol1:
+                return False
+            beta = beta[1:]
+            kkt = kkt[1:]
+
+        # regressors
+        for j, bj in enumerate(beta):
+            cond = kkt[j]
+            if bj == 0.0:
+                if np.abs(cond) > self.lambda_ + tol2:
+                    return False
+                continue
+            cond *= np.sign(bj)
+            if self.lambda_ == 0.0:
+                if np.abs(cond) > np.abs(bj) * tol1:
+                    return False
+            else:
+                if np.abs(cond - self.lambda_) > tol1 * self.lambda_:
+                    return False
+        return True
diff --git a/bbai/glm/_logistic_regression.py b/bbai/glm/_logistic_regression.py
@@ -117,6 +117,7 @@ def fit(self, X, y):
             self.intercept_ = -self.intercept_
         self.alpha_ = response['hyperparameter_vector'][0] ** 2
         self.C_ = 0.5 / self.alpha_
+        return self
 
 
     def predict(self, X):

diff --git a/bbai/glm/_random_dataset.py b/bbai/glm/_random_dataset.py
@@ -0,0 +1,41 @@
+import numpy as np
+
+class RandomRegressionDataset:
+    def __init__(self,
+                 n = 10,
+                 p = 5,
+                 cor = 0.5,
+                 err = .1,
+                 active_prob = 0.5,
+                 with_intercept = False,
+        ):
+        self.with_intercept = with_intercept
+
+        # X
+        K = np.zeros((p, p))
+        for j1 in range(p):
+            for j2 in range(p):
+                K[j1, j2] = cor ** np.abs(j1 - j2)
+        self.X = np.random.multivariate_normal(np.zeros(p), K, size=n)
+
+        # beta
+        beta = np.zeros(p)
+        for j in range(p):
+            if not np.random.binomial(1, active_prob):
+                continue
+            beta[j] = np.random.laplace()
+        self.beta = beta
+
+        # y
+        y = np.dot(self.X, self.beta)
+        if with_intercept:
+            y += np.random.laplace()
+        y += np.random.normal(0, err, size=n)
+        self.y = y
+
+        # lambda_max
+        u = np.dot(self.X.T, self.y)
+        if with_intercept:
+            u -= np.mean(self.y) 
+        self.lambda_max = np.max(np.abs(u))
+
diff --git a/bbai/glm/_ridge_regression.py b/bbai/glm/_ridge_regression.py
@@ -77,6 +77,7 @@ def fit(self, X, y):
         self.coef_ = response['weight_matrix'][:, 0]
         self.intercept_ = response['intercept_vector'][0]
         self.alpha_ = response['hyperparameter_vector'][0] ** 2
+        return self
 
     def predict(self, X):
         """Predict target values."""

diff --git a/bbai/gp/_bayesian_gaussian_process_regression.py b/bbai/gp/_bayesian_gaussian_process_regression.py
@@ -151,6 +151,7 @@ def fit(self, sample_matrix, y, design_matrix=None):
 
         self.length_mode_ = np.exp(response['log_length'])
         self.noise_ratio_mode_ = np.exp(response['log_noise_ratio'])
+        return self
 
     def predict(self, sample_matrix, design_matrix=None, with_pdf=False):
         """Predict target values."""

diff --git a/test/lasso_test.py b/test/lasso_test.py
@@ -0,0 +1,100 @@
+import numpy as np
+import pytest
+from pytest import approx
+from bbai.glm import Lasso, RandomRegressionDataset, LassoKKT 
+
+def test_1x1():
+    X = np.array([[123]])
+    y = np.array([-0.5])
+
+    lambda0 = np.abs(X[0, 0] * y[0])
+
+    # if lambda > lambda0, the regressor is zero
+    model = Lasso(lambda0, fit_intercept=False)
+    model.fit(X, y)
+    assert model.coef_ == np.array([0.0])
+
+    # if lambda == 0, we get the least squares solution
+    model = Lasso(0.0, fit_intercept=False)
+    model.fit(X, y)
+    assert model.coef_[0] == approx(y[0] / X[0, 0])
+
+    # KKT conditions are satisfied
+    lda = lambda0 / 2
+    model = Lasso(lda, fit_intercept=False)
+    model.fit(X, y)
+    h = np.dot(X.T, y - np.dot(X, model.coef_))
+    assert h[0] == approx(lda * np.sign(model.coef_[0]))
+
+    # we can fit the beta path
+    model = Lasso(0.0, fit_intercept=False, fit_beta_path=True)
+    model.fit(X, y)
+    assert model.beta_path_ == approx(np.array([[0.0, lambda0], [y[0] / X[0, 0], 0.0]]))
+
+def test_2x1():
+    X = np.array([[11.3], [-22.4]])
+    y = np.array([-0.5, 1.321])
+
+    lambda0 = np.abs(np.dot(X.T, y)[0])
+    beta_ols = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))[0]
+
+    # if lambda > lambda0, the regressor is zero
+    model = Lasso(lambda0, fit_intercept=False)
+    model.fit(X, y)
+    assert model.coef_ == np.array([0.0])
+
+    # if lambda == 0, we get the least squares solution
+    model = Lasso(0.0, fit_intercept=False)
+    model.fit(X, y)
+    assert model.coef_[0] == approx(beta_ols)
+
+    # KKT conditions are satisfied
+    lda = lambda0 / 2
+    model = Lasso(lda, fit_intercept=False)
+    model.fit(X, y)
+    h = np.dot(X.T, y - np.dot(X, model.coef_))
+    assert h[0] == approx(lda * np.sign(model.coef_[0]))
+
+    # we can fit the beta path
+    model = Lasso(0.0, fit_intercept=False, fit_beta_path=True)
+    model.fit(X, y)
+    assert model.beta_path_ == approx(np.array([[0.0, lambda0], [beta_ols, 0.0]]))
+
+def test_random():
+    np.random.seed(3)
+
+    # we can fit random data sets
+    for _ in range(10):
+        ds = RandomRegressionDataset(n=10, p=5)
+        lda = np.random.uniform() * ds.lambda_max
+        model = Lasso(lda, fit_intercept=False)
+        model.fit(ds.X, ds.y)
+        kkt = LassoKKT(ds.X, ds.y, lda, model.beta_, with_intercept=False)
+        assert kkt.within(1.0e-6)
+        xp = np.random.uniform(size=5)
+        expected = np.dot(xp, model.coef_)
+        assert (model.predict(xp) == expected).all()
+
+    # we can fit random data sets with intercept
+    for _ in range(10):
+        ds = RandomRegressionDataset(n=10, p=5, with_intercept=True)
+        lda = np.random.uniform() * ds.lambda_max
+        model = Lasso(lda, fit_intercept=True)
+        model.fit(ds.X, ds.y)
+        kkt = LassoKKT(ds.X, ds.y, lda, model.beta_, with_intercept=True)
+        assert kkt.within(1.0e-6)
+        xp = np.random.uniform(size=5)
+        expected = model.intercept_ + np.dot(xp, model.coef_)
+        assert (model.predict(xp) == expected).all()
+
+    # we can fit data sets where (num_regressors) > (num_data)
+    for _ in range(10):
+        ds = RandomRegressionDataset(n=5, p=6, with_intercept=True)
+        lda = np.random.uniform() * ds.lambda_max
+        model = Lasso(lda, fit_intercept=True)
+        model.fit(ds.X, ds.y)
+        kkt = LassoKKT(ds.X, ds.y, lda, model.beta_, with_intercept=True)
+        assert kkt.within(1.0e-6)
+
+if __name__ == "__main__":
+    raise SystemExit(pytest.main([__file__]))