joshivanhoe · joshivanhoe · May 29, 2024 · May 29, 2024 · May 29, 2024 · May 29, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: CI
+name: CI workflow
 
 on:
   push:
@@ -13,27 +13,27 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
     - uses: actions/checkout@v3
+
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v3
       with:
         python-version: ${{ matrix.python-version }}
+
+    - name: Install poetry
+      run: python -m pip install poetry
+
     - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      run: poetry install
+
+    - name: Format with black
+      run: poetry run black ./src
+
     - name: Lint with flake8
-      run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
-      env:
-        PYTHONPATH: ${{ github.workspace }}/src
-      run: |
-        pytest
+      run: poetry run flake8 --max-line-length 120
+
+    - name: Run tests
+      run: poetry run pytest --cov=src
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -1,4 +1,4 @@
-name: Upload Python Package
+name: Publish Python package to PyPI
 
 on:
   release:
@@ -14,18 +14,17 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
+
     - name: Set up Python
       uses: actions/setup-python@v3
       with:
-        python-version: '3.11'
+        python-version: "3.11"
+
     - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install build
-    - name: Build package
-      run: python -m build
-    - name: Publish package
-      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
-      with:
-        user: __token__
-        password: ${{ secrets.PYPI_API_TOKEN }}
+      run: python -m pip pip install poetry
+
+    - name: Configure poetry with PyPI token
+      run: poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }}
+
+    - name: Build and publish package
+      run: poetry publish --build
diff --git a/README.md b/README.md
@@ -39,8 +39,7 @@ Clone the repository using `git`:
 git clone https://github.com/joshivanhoe/sparsely
 ````
 
-Create a fresh virtual environment using `venv` or `conda`.
-Activate the environment and navigate to the cloned `halfspace` directory.
+Create a fresh virtual environment, activate it and navigate to the cloned `sparsely` directory.
 Install a locally editable version of the package using `pip`:
 
 ```bash

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,27 +1,45 @@
-[build-system]
-requires = ["setuptools >= 61.0"]
-build-backend = "setuptools.build_meta"
-
-[project]
+[tool.poetry]
 name = "sparsely"
-version = "1.1.0"
+version = "1.1.2"
+description = "Scalable sparse linear models in Python"
+license ="MIT"
 authors = [
-  { name="Joshua Ivanhoe", email="[email protected]" },
+    "Joshua Ivanhoe <[email protected]>"
 ]
-description = "Scalable sparse linear models in Python"
 readme = "README.md"
-license = {file = "LICENSE"}
-requires-python = ">=3.9,<3.12"
+repository = "https://github.com/joshivanhoe/sparsely"
+documentation = "https://joshivanhoe.github.io/sparsely/"
 classifiers = [
     "Programming Language :: Python :: 3",
     "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
 ]
-dynamic = ["dependencies"]
+packages = [
+    { include = "sparsely", from = "src" }
+]
 
-[tool.setuptools.dynamic]
-dependencies = {file = ["requirements.txt"]}
+[tool.poetry.dependencies]
+python = ">=3.10,<3.12"
+halfspace-optimizer = ">=0.1.0"
+scikit-learn = ">=1.3.2"
+tqdm = ">=4.66.1"
 
-[project.urls]
-Homepage = "https://github.com/joshivanhoe/sparsely"
-Issues = "https://github.com/joshivanhoe/sparsely/issues"
+[tool.poetry.group.dev.dependencies]
+black = "*"
+mypy = "*"
+flake8 = "*"
+pre-commit = "*"
+tomli = "*"
+
+[tool.poetry.group.test.dependencies]
+pytest = "*"
+pytest-cov = "*"
+
+[tool.poetry.group.docs]
+optional = true
+[tool.poetry.group.docs.dependencies]
+sphinx = "*"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/requirements.txt b/requirements.txt
diff --git a/src/sparsely/__init__.py b/src/sparsely/__init__.py
@@ -2,3 +2,5 @@
 from .classifier import SparseLinearClassifier
 from .regressor import SparseLinearRegressor
 from .tune import tune_estimator
+
+__all__ = ["SparseLinearClassifier", "SparseLinearRegressor", "tune_estimator"]
diff --git a/src/sparsely/base.py b/src/sparsely/base.py
@@ -9,7 +9,7 @@
 import warnings
 from abc import ABC, abstractmethod
 from numbers import Real, Integral
-from typing import Optional, Callable, ClassVar, Sequence
+from typing import Callable, ClassVar, Sequence
 
 import numpy as np
 from halfspace import Model
@@ -40,8 +40,8 @@ class BaseSparseEstimator(BaseEstimator, ABC):
             features will be selected. If `None`, then the initial guess is randomly selected. Providing a good initial
             guess based on problem-specific knowledge can significantly speed up the search.
         feature_groups: Set of features that are mutually exclusive. For example, if `feature_groups=[{0, 1}, {2, 3}]`,
-            then at most one features 0 and 1 will be selected, and at most one features 2 and 3 will be selected. This
-            can be used to encode prior knowledge about the problem.
+            then at most one of the features 0 and 1 will be selected, and at most one features 2 and 3 will be
+            selected. This can be used to encode prior knowledge about the problem.
         solver: The solver to use for the optimization problem. The available options are "CBC" and "GUROBI". Support
             for the "HiGHS" solver is also planned for a future release.
         random_state: Controls the random seed for the initial guess if a user-defined initial guess is not provided.
@@ -63,15 +63,15 @@ class BaseSparseEstimator(BaseEstimator, ABC):
 
     def __init__(
         self,
-        k: Optional[int] = None,
-        gamma: Optional[float] = None,
+        k: int | None = None,
+        gamma: float | None = None,
         normalize: bool = True,
         max_iters: int = 500,
         tol: float = 1e-4,
-        start: Optional[set[int]] = None,
-        feature_groups: Optional[Sequence[set[int]]] = None,
+        start: set[int] | None = None,
+        feature_groups: Sequence[set[int]] | None = None,
         solver: str = "CBC",
-        random_state: Optional[int] = None,
+        random_state: int | None = None,
         verbose: bool = False,
     ):
         """Model constructor.

diff --git a/src/sparsely/tune.py b/src/sparsely/tune.py
@@ -1,7 +1,6 @@
 """This module implements a function to tune the sparsity parameter of a linear model using cross-validation."""
 
 from copy import deepcopy
-from typing import Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -12,21 +11,21 @@
 from .classifier import SparseLinearClassifier
 from .regressor import SparseLinearRegressor
 
-Estimator = Union[SparseLinearRegressor, SparseLinearClassifier]
+Estimator = SparseLinearRegressor | SparseLinearClassifier
 
 
 def tune_estimator(
     X: np.ndarray,
     y: np.ndarray,
     estimator: Estimator,
     k_min: int = 1,
-    k_max: int = None,
+    k_max: int | None = None,
     step_size: int = 1,
-    max_iters_no_improvement: Optional[int] = None,
+    max_iters_no_improvement: int | None = None,
     cv: int = 3,
     return_search_log: bool = False,
     show_progress_bar: bool = False,
-) -> Union[Estimator, tuple[Estimator, pd.DataFrame]]:
+) -> Estimator | tuple[Estimator, pd.DataFrame]:
     """Tune the sparsity parameter (i.e. number of non-zero coefficients) of a linear model.
 
     The sparsity parameter is tuned by performing a grid search over the range [k_min, k_max] with step size

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -3,11 +3,11 @@
 from sklearn.datasets import make_regression, make_classification
 from sklearn.model_selection import train_test_split
 
+Dataset = tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray | None]
+
 
 @pytest.fixture
-def regression_dataset() -> tuple[
-    np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray
-]:
+def regression_dataset() -> Dataset:
     """Generate a regression dataset."""
     X, y, coef = make_regression(
         n_samples=1000,
@@ -21,7 +21,7 @@ def regression_dataset() -> tuple[
 
 
 @pytest.fixture
-def classification_dataset() -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+def classification_dataset() -> Dataset:
     """Generate a classification dataset."""
     X, y = make_classification(
         n_samples=1000,
@@ -32,4 +32,4 @@ def classification_dataset() -> tuple[np.ndarray, np.ndarray, np.ndarray, np.nda
         random_state=0,
     )
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-    return X_train, X_test, y_train, y_test
+    return X_train, X_test, y_train, y_test, None
diff --git a/tests/test_classifier.py b/tests/test_classifier.py
@@ -1,11 +1,9 @@
-import numpy as np
 import pytest
 from sklearn.metrics import roc_auc_score, balanced_accuracy_score
 
+from conftest import Dataset
 from sparsely import SparseLinearClassifier
 
-Dataset = tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
-
 
 # TODO: add tests for sklearn compatibility - requires support for multi-class problems
 
@@ -24,7 +22,7 @@
 def test_sparse_linear_regressor(
     classification_dataset: Dataset, estimator: SparseLinearClassifier
 ):
-    X_train, X_test, y_train, y_test = classification_dataset
+    X_train, X_test, y_train, y_test, _ = classification_dataset
     estimator.fit(X_train, y_train)
     predicted = estimator.predict(X_test)
     predicted_proba = estimator.predict_proba(X_test)
@@ -52,6 +50,6 @@ def test_sparse_linear_regressor(
 def test_sparse_linear_regressor_invalid_params(
     classification_dataset: Dataset, estimator: SparseLinearClassifier
 ):
-    X_train, X_test, y_train, y_test = classification_dataset
+    X_train, X_test, y_train, y_test, _ = classification_dataset
     with pytest.raises((ValueError, TypeError)):
         estimator.fit(X_train, y_train)
diff --git a/tests/test_regressor.py b/tests/test_regressor.py
@@ -2,10 +2,9 @@
 import pytest
 from sklearn.utils.estimator_checks import check_estimator
 
+from conftest import Dataset
 from sparsely import SparseLinearRegressor
 
-Dataset = tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]
-
 
 def test_sklearn_compatibility():
     check_estimator(SparseLinearRegressor())