From 644feb822f353da0b43c524152bdc7fe95339bf0 Mon Sep 17 00:00:00 2001 From: Joshua Ivanhoe Date: Wed, 29 May 2024 09:47:49 +0100 Subject: [PATCH 01/10] Update min Python version to 3.10 and switch to using poetry --- .github/workflows/ci.yml | 30 ++++++++++++------------ pyproject.toml | 49 +++++++++++++++++++++++++++------------- src/sparsely/base.py | 16 ++++++------- src/sparsely/tune.py | 7 +++--- tests/conftest.py | 10 ++++---- tests/test_classifier.py | 6 ++--- tests/test_regressor.py | 3 +-- 7 files changed, 67 insertions(+), 54 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a16f9a2..2af348d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,27 +13,27 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} + + - name: Install poetry + run: python -m pip install poetry + - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + run: poetry install + - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - env: - PYTHONPATH: ${{ github.workspace }}/src - run: | - pytest + run: poetry run flake8 + + - name: Format with black + run: poetry run black + + - name: Run tests + run: poetry run pytest --cov diff --git a/pyproject.toml b/pyproject.toml index bfae609..46e711d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,27 +1,44 @@ -[build-system] -requires = ["setuptools >= 61.0"] -build-backend = "setuptools.build_meta" - -[project] +[tool.poetry] name = "sparsely" -version = "1.1.0" +version = "1.1.2" +description = "Scalable sparse linear models in Python" +license ="MIT" authors = [ - { name="Joshua Ivanhoe", email="joshua.k.ivanhoe@gmail.com" }, + "Joshua Ivanhoe " ] -description = "Scalable sparse linear models in Python" readme = "README.md" -license = {file = "LICENSE"} -requires-python = ">=3.9,<3.12" +repository = "https://github.com/joshivanhoe/sparsely" +documentation = "https://joshivanhoe.github.io/sparsely/" classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] -dynamic = ["dependencies"] +packages = [ + { include = "sparsely", from = "src" } +] -[tool.setuptools.dynamic] -dependencies = {file = ["requirements.txt"]} +[tool.poetry.dependencies] +python = "^3.10" +halfspace-optimizer = ">=0.1.0" +scikit-learn = ">=1.3.2" +tqdm = ">=4.66.1" -[project.urls] -Homepage = "https://github.com/joshivanhoe/sparsely" -Issues = "https://github.com/joshivanhoe/sparsely/issues" +[tool.poetry.group.dev.dependencies] +black = "*" +flake8 = "*" +pre-commit = "*" +tomli = "*" + +[tool.poetry.group.test.dependencies] +pytest = "*" +pytest-cov = "*" + +[tool.poetry.group.docs] +optional = true +[tool.poetry.group.docs.dependencies] +sphinx = "*" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/src/sparsely/base.py b/src/sparsely/base.py index 9dc899f..717afea 100644 --- a/src/sparsely/base.py +++ b/src/sparsely/base.py @@ -9,7 +9,7 @@ import warnings from abc import ABC, abstractmethod from numbers import Real, Integral -from typing import Optional, Callable, ClassVar, Sequence +from typing import Callable, ClassVar, Sequence import numpy as np from halfspace import Model @@ -40,8 +40,8 @@ class BaseSparseEstimator(BaseEstimator, ABC): features will be selected. If `None`, then the initial guess is randomly selected. Providing a good initial guess based on problem-specific knowledge can significantly speed up the search. feature_groups: Set of features that are mutually exclusive. For example, if `feature_groups=[{0, 1}, {2, 3}]`, - then at most one features 0 and 1 will be selected, and at most one features 2 and 3 will be selected. This - can be used to encode prior knowledge about the problem. + then at most one of the features 0 and 1 will be selected, and at most one features 2 and 3 will be + selected. This can be used to encode prior knowledge about the problem. solver: The solver to use for the optimization problem. The available options are "CBC" and "GUROBI". Support for the "HiGHS" solver is also planned for a future release. random_state: Controls the random seed for the initial guess if a user-defined initial guess is not provided. @@ -63,15 +63,15 @@ class BaseSparseEstimator(BaseEstimator, ABC): def __init__( self, - k: Optional[int] = None, - gamma: Optional[float] = None, + k: int | None = None, + gamma: float | None = None, normalize: bool = True, max_iters: int = 500, tol: float = 1e-4, - start: Optional[set[int]] = None, - feature_groups: Optional[Sequence[set[int]]] = None, + start: set[int] | None = None, + feature_groups: Sequence[set[int]] | None = None, solver: str = "CBC", - random_state: Optional[int] = None, + random_state: int | None = None, verbose: bool = False, ): """Model constructor. diff --git a/src/sparsely/tune.py b/src/sparsely/tune.py index 5e0137f..47bc751 100644 --- a/src/sparsely/tune.py +++ b/src/sparsely/tune.py @@ -1,7 +1,6 @@ """This module implements a function to tune the sparsity parameter of a linear model using cross-validation.""" from copy import deepcopy -from typing import Optional, Union import numpy as np import pandas as pd @@ -12,7 +11,7 @@ from .classifier import SparseLinearClassifier from .regressor import SparseLinearRegressor -Estimator = Union[SparseLinearRegressor, SparseLinearClassifier] +Estimator = SparseLinearRegressor | SparseLinearClassifier def tune_estimator( @@ -22,11 +21,11 @@ def tune_estimator( k_min: int = 1, k_max: int = None, step_size: int = 1, - max_iters_no_improvement: Optional[int] = None, + max_iters_no_improvement: int | None = None, cv: int = 3, return_search_log: bool = False, show_progress_bar: bool = False, -) -> Union[Estimator, tuple[Estimator, pd.DataFrame]]: +) -> Estimator | tuple[Estimator, pd.DataFrame]: """Tune the sparsity parameter (i.e. number of non-zero coefficients) of a linear model. The sparsity parameter is tuned by performing a grid search over the range [k_min, k_max] with step size diff --git a/tests/conftest.py b/tests/conftest.py index 8a01c69..7ae922c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,11 +3,11 @@ from sklearn.datasets import make_regression, make_classification from sklearn.model_selection import train_test_split +Dataset = tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray | None] + @pytest.fixture -def regression_dataset() -> tuple[ - np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray -]: +def regression_dataset() -> Dataset: """Generate a regression dataset.""" X, y, coef = make_regression( n_samples=1000, @@ -21,7 +21,7 @@ def regression_dataset() -> tuple[ @pytest.fixture -def classification_dataset() -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: +def classification_dataset() -> Dataset: """Generate a classification dataset.""" X, y = make_classification( n_samples=1000, @@ -32,4 +32,4 @@ def classification_dataset() -> tuple[np.ndarray, np.ndarray, np.ndarray, np.nda random_state=0, ) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) - return X_train, X_test, y_train, y_test + return X_train, X_test, y_train, y_test, None diff --git a/tests/test_classifier.py b/tests/test_classifier.py index 2f7b2be..0224a84 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -1,11 +1,9 @@ -import numpy as np import pytest from sklearn.metrics import roc_auc_score, balanced_accuracy_score +from conftest import Dataset from sparsely import SparseLinearClassifier -Dataset = tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray] - # TODO: add tests for sklearn compatibility - requires support for multi-class problems @@ -24,7 +22,7 @@ def test_sparse_linear_regressor( classification_dataset: Dataset, estimator: SparseLinearClassifier ): - X_train, X_test, y_train, y_test = classification_dataset + X_train, X_test, y_train, y_test, _ = classification_dataset estimator.fit(X_train, y_train) predicted = estimator.predict(X_test) predicted_proba = estimator.predict_proba(X_test) diff --git a/tests/test_regressor.py b/tests/test_regressor.py index 556fa14..74bd0ee 100644 --- a/tests/test_regressor.py +++ b/tests/test_regressor.py @@ -2,10 +2,9 @@ import pytest from sklearn.utils.estimator_checks import check_estimator +from conftest import Dataset from sparsely import SparseLinearRegressor -Dataset = tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray] - def test_sklearn_compatibility(): check_estimator(SparseLinearRegressor()) From 3d3551f0e99d8ab6a1f37520274a03a946192e53 Mon Sep 17 00:00:00 2001 From: Joshua Ivanhoe Date: Wed, 29 May 2024 09:50:40 +0100 Subject: [PATCH 02/10] Update min Python version to 3.10 and switch to using poetry --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2af348d..3d08fe5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,11 +29,11 @@ jobs: - name: Install dependencies run: poetry install - - name: Lint with flake8 - run: poetry run flake8 - - name: Format with black run: poetry run black + - name: Lint with flake8 + run: poetry run flake8 + - name: Run tests run: poetry run pytest --cov From 28f18b1053333f9cff51917e306acee51b9d7b1d Mon Sep 17 00:00:00 2001 From: Joshua Ivanhoe Date: Wed, 29 May 2024 09:55:06 +0100 Subject: [PATCH 03/10] Update Python version in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 46e711d..c66def1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ packages = [ ] [tool.poetry.dependencies] -python = "^3.10" +python = ">=3.10,<3.12" halfspace-optimizer = ">=0.1.0" scikit-learn = ">=1.3.2" tqdm = ">=4.66.1" From 21a13978d6886c9acefd897ce1c75d6f257dc890 Mon Sep 17 00:00:00 2001 From: Joshua Ivanhoe Date: Wed, 29 May 2024 09:56:33 +0100 Subject: [PATCH 04/10] Debug ci.yml (WIP) --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d08fe5..1a8cfb8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,7 +30,7 @@ jobs: run: poetry install - name: Format with black - run: poetry run black + run: poetry run black ./src - name: Lint with flake8 run: poetry run flake8 From 53ebed070de1715350afc3a991acbea9b0b07ced Mon Sep 17 00:00:00 2001 From: Joshua Ivanhoe Date: Wed, 29 May 2024 10:00:00 +0100 Subject: [PATCH 05/10] Debug ci.yml (WIP) --- .github/workflows/ci.yml | 2 +- src/sparsely/__init__.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1a8cfb8..729af5a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,7 +33,7 @@ jobs: run: poetry run black ./src - name: Lint with flake8 - run: poetry run flake8 + run: poetry run flake8 --max-line-length 120 - name: Run tests run: poetry run pytest --cov diff --git a/src/sparsely/__init__.py b/src/sparsely/__init__.py index c20e5ab..a169602 100644 --- a/src/sparsely/__init__.py +++ b/src/sparsely/__init__.py @@ -2,3 +2,5 @@ from .classifier import SparseLinearClassifier from .regressor import SparseLinearRegressor from .tune import tune_estimator + +__all__ = ["SparseLinearClassifier", "SparseLinearRegressor", "tune_estimator"] From fb79036a6c58a76df4616c642c776860886dc9d0 Mon Sep 17 00:00:00 2001 From: Joshua Ivanhoe Date: Wed, 29 May 2024 10:01:40 +0100 Subject: [PATCH 06/10] Debug failing tests --- tests/test_classifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_classifier.py b/tests/test_classifier.py index 0224a84..839a9e4 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -50,6 +50,6 @@ def test_sparse_linear_regressor( def test_sparse_linear_regressor_invalid_params( classification_dataset: Dataset, estimator: SparseLinearClassifier ): - X_train, X_test, y_train, y_test = classification_dataset + X_train, X_test, y_train, y_test, _ = classification_dataset with pytest.raises((ValueError, TypeError)): estimator.fit(X_train, y_train) From ff8277e0126014ccab35bd140affc0f61db88505 Mon Sep 17 00:00:00 2001 From: Joshua Ivanhoe Date: Tue, 18 Jun 2024 11:41:00 +0100 Subject: [PATCH 07/10] Update GitHub Actions workflows (WIP) --- .github/workflows/ci.yml | 4 ++-- .github/workflows/release.yml | 23 +++++++++++------------ README.md | 3 +-- pyproject.toml | 1 + src/sparsely/tune.py | 2 +- 5 files changed, 16 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 729af5a..b933b3a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -name: CI +name: CI workflow on: push: @@ -36,4 +36,4 @@ jobs: run: poetry run flake8 --max-line-length 120 - name: Run tests - run: poetry run pytest --cov + run: poetry run pytest --cov=src diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1c0b6e0..33f040a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,4 +1,4 @@ -name: Upload Python Package +name: Publish Python package to PyPI on: release: @@ -14,18 +14,17 @@ jobs: steps: - uses: actions/checkout@v3 + - name: Set up Python uses: actions/setup-python@v3 with: - python-version: '3.11' + python-version: "3.11" + - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} + run: python -m pip pip install poetry + + - name: Configure poetry with PyPI token + run: poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }} + + - name: Build and publish package + run: poetry publish --build diff --git a/README.md b/README.md index bde052e..e4e3ae0 100644 --- a/README.md +++ b/README.md @@ -39,8 +39,7 @@ Clone the repository using `git`: git clone https://github.com/joshivanhoe/sparsely ```` -Create a fresh virtual environment using `venv` or `conda`. -Activate the environment and navigate to the cloned `halfspace` directory. +Create a fresh virtual environment, activate it and navigate to the cloned `sparsely` directory. Install a locally editable version of the package using `pip`: ```bash diff --git a/pyproject.toml b/pyproject.toml index c66def1..b605ca1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ pre-commit = "*" tomli = "*" [tool.poetry.group.test.dependencies] +mypy = "*" pytest = "*" pytest-cov = "*" diff --git a/src/sparsely/tune.py b/src/sparsely/tune.py index 47bc751..51f8a6b 100644 --- a/src/sparsely/tune.py +++ b/src/sparsely/tune.py @@ -19,7 +19,7 @@ def tune_estimator( y: np.ndarray, estimator: Estimator, k_min: int = 1, - k_max: int = None, + k_max: int | None = None, step_size: int = 1, max_iters_no_improvement: int | None = None, cv: int = 3, From 1cdf371fee815c38af0f6a013c449c4c5c5ffccf Mon Sep 17 00:00:00 2001 From: Joshua Ivanhoe Date: Wed, 19 Jun 2024 10:18:48 +0100 Subject: [PATCH 08/10] Update pyproject.toml --- pyproject.toml | 4 ++-- requirements.txt | 7 ------- 2 files changed, 2 insertions(+), 9 deletions(-) delete mode 100644 requirements.txt diff --git a/pyproject.toml b/pyproject.toml index b605ca1..0a5e114 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,19 +19,19 @@ packages = [ ] [tool.poetry.dependencies] -python = ">=3.10,<3.12" +python = ">=3.10,=<3.12" halfspace-optimizer = ">=0.1.0" scikit-learn = ">=1.3.2" tqdm = ">=4.66.1" [tool.poetry.group.dev.dependencies] black = "*" +mypy = "*" flake8 = "*" pre-commit = "*" tomli = "*" [tool.poetry.group.test.dependencies] -mypy = "*" pytest = "*" pytest-cov = "*" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 223a7d6..0000000 --- a/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -halfspace-optimizer>=0.1.0 -scikit-learn>=1.3.2 -pre-commit>=3.6.0 -pytest>=7.4.4 -pytest-cov>=4.1.0 -tomli>=2.0.1 -tqdm>=4.66.1 From b963abed53fdbe32bf13d22ee2fcc26cc936e454 Mon Sep 17 00:00:00 2001 From: Joshua Ivanhoe Date: Tue, 6 Aug 2024 15:05:00 +0100 Subject: [PATCH 09/10] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0a5e114..14c4cf2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ packages = [ ] [tool.poetry.dependencies] -python = ">=3.10,=<3.12" +python = ">=3.10,<=3.12" halfspace-optimizer = ">=0.1.0" scikit-learn = ">=1.3.2" tqdm = ">=4.66.1" From 00025b0224960ad3991c8900cce7c0c8b2e18788 Mon Sep 17 00:00:00 2001 From: Joshua Ivanhoe Date: Tue, 6 Aug 2024 15:27:43 +0100 Subject: [PATCH 10/10] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 14c4cf2..e607cee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ packages = [ ] [tool.poetry.dependencies] -python = ">=3.10,<=3.12" +python = ">=3.10,<3.12" halfspace-optimizer = ">=0.1.0" scikit-learn = ">=1.3.2" tqdm = ">=4.66.1"