Skip to content

Commit

Permalink
added ci pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
glevv committed Nov 25, 2023
1 parent 6459155 commit 4cf8628
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 44 deletions.
68 changes: 68 additions & 0 deletions .github/workflows/package.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: CI

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: True
matrix:
python-version: [ "3.11" ]

steps:
- name: Check out repository
uses: actions/checkout@v3
- name: Set up python
id: setup-python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Install Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true

- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}

- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root

- name: Activate env
run: |
source .venv/bin/activate
- uses: chartboost/ruff-action@v1
name: Ruff
with:
args: --check .
config: pyproject.toml

- name: Run mypy
run: |
mypy .
- name: Run tests
run: |
pytest --cov tests/
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4-beta
with:
flags: smart-tests
verbose: true
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
6 changes: 3 additions & 3 deletions src/obscure_stats/association/association.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ def concordance_rate(
It could be seen as simplified version of Pearson's R.
It differs from quadrant count ratio by adding and exclusion zone
variation has an option for an exclusion zone.
It is based on the standard error of the mean and will exlucde
points that are in the range of mean+-sem.
variation has an option for an exclusion zone. It is based on the
standard error of the mean and will exlucde points that are in the
range of mean+-sem.
Parameters
----------
Expand Down
2 changes: 0 additions & 2 deletions src/obscure_stats/dispersion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
coefficient_of_lvariation,
coefficient_of_variation,
dispersion_ratio,
efficiency,
lloyds_index,
morisita_index,
quartile_coefficient_of_dispersion,
Expand All @@ -17,7 +16,6 @@
"coefficient_of_lvariation",
"coefficient_of_variation",
"dispersion_ratio",
"efficiency",
"lloyds_index",
"morisita_index",
"quartile_coefficient_of_dispersion",
Expand Down
55 changes: 22 additions & 33 deletions src/obscure_stats/dispersion/dispersion.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,6 @@
EPS = 1e-6


def efficiency(x: np.ndarray) -> float:
"""Calculate array efficiency (squared CV).
Parameters
----------
x : array_like
Input array.
Returns
-------
eff : float or array_like.
The value of the efficiency.
References
----------
Grubbs, F. E. (1965).
Statistical Measures of Accuracy for Riflemen and Missile Engineers. pp. 26-27.
"""
mean = np.nanmean(x)
if abs(mean) <= EPS:
warnings.warn("Mean is close to 0. Statistic is undefined.", stacklevel=2)
return np.inf
return np.nanvar(x) / mean**2


def studentized_range(x: np.ndarray) -> float:
"""Calculate range normalized by standard deviation.
Expand All @@ -59,7 +34,10 @@ def studentized_range(x: np.ndarray) -> float:


def coefficient_of_lvariation(x: np.ndarray) -> float:
"""Calculate linear coefficient of variation (MeanAbsDev / Mean).
"""Calculate linear coefficient of variation.
L-CV is the L-scale (half of mean absolute deviation) divided
by L-mean (the same as regular mean).
Parameters
----------
Expand Down Expand Up @@ -87,7 +65,7 @@ def coefficient_of_lvariation(x: np.ndarray) -> float:


def coefficient_of_variation(x: np.ndarray) -> float:
"""Calculate coefficient of variation (Std / Mean).
"""Calculate coefficient of variation (Standard deviation / Mean).
Parameters
----------
Expand Down Expand Up @@ -115,7 +93,8 @@ def coefficient_of_variation(x: np.ndarray) -> float:
def robust_coefficient_of_variation(x: np.ndarray) -> float:
"""Calculate robust coefficient of variation.
It is based on median absolute deviation from the median (MedAbsDev / Median).
It is based on median absolute deviation from the median, i.e. median
absolute deviation from the median divided by the median.
Parameters
----------
Expand Down Expand Up @@ -170,6 +149,11 @@ def quartile_coefficient_of_dispersion(x: np.ndarray) -> float:
def dispersion_ratio(x: np.ndarray) -> float:
"""Calculate dispersion ratio (Mean / GMean).
The closer a dispersion ratio is to 1, the lower the dispersion
between the observations within an array.
In this function geometric mean computed by excluding zeros and
missing data points.
Parameters
----------
x : array_like
Expand All @@ -187,7 +171,9 @@ def dispersion_ratio(x: np.ndarray) -> float:
prior to unsupervised machine learning.
Statistics, Optimization & Information Computing, 11(2), 519-530.
"""
return np.nanmean(x) / (stats.gmean(x, nan_policy="omit") + EPS)
_x = np.asarray(x)
_x = np.where(_x == 0, np.nan, _x)
return np.nanmean(x) / stats.gmean(_x, nan_policy="omit")


def lloyds_index(x: np.ndarray) -> float:
Expand Down Expand Up @@ -246,8 +232,8 @@ def morisita_index(x: np.ndarray) -> float:
def sqad(x: np.ndarray) -> float:
"""Calculate Standard quantile absolute deviation.
This measure is a robust measure of dispersion, that does not need
normalizing constant like MAD and has higher gaussian efficiency.
This measure is a robust measure of dispersion, that has higher
gaussian efficiency, but lower breaking point.
Parameters
----------
Expand All @@ -266,5 +252,8 @@ def sqad(x: np.ndarray) -> float:
arXiv preprint arXiv:2208.13459.
"""
med = np.nanmedian(x)
# constant value to maximize efficiency for normal distribution
return np.nanquantile(np.abs(x - med), q=0.682689492137086)
n = len(x)
# finite sample correction
k = 1.0 + 0.762 / n + 0.967 / n**2
# constant value that maximizes efficiency for normal distribution
return k * np.nanquantile(np.abs(x - med), q=0.682689492137086)
5 changes: 5 additions & 0 deletions src/obscure_stats/kurtosis/kurtosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
def moors_kurt(x: np.ndarray) -> float:
"""Calculate Moor's vision of kurtosis, based on Z score.
The kurtosis can now be seen as a measure of the dispersion of
squared Z around its expectation.
Alternatively it can be seen to be a measure of the dispersion
of Z around +1 and -1.
Parameters
----------
x : array_like
Expand Down
4 changes: 3 additions & 1 deletion src/obscure_stats/skewness/skewness.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ def medeen_skew(x: np.ndarray) -> float:
def bowley_skew(x: np.ndarray) -> float:
"""Calculate Bowley's skewness coefficinet.
It is based on quartiles (uncentered, unscaled).
Also known as Yule-Kendall skewness coefficient.
It is based on quartiles (uncentered, unscaled) and compares the distance
between the median and each of the two quartiles.
This measure should be more robust than moment based skewness.
Parameters
Expand Down
5 changes: 0 additions & 5 deletions tests/test_dispersion.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
coefficient_of_lvariation,
coefficient_of_variation,
dispersion_ratio,
efficiency,
lloyds_index,
morisita_index,
quartile_coefficient_of_dispersion,
Expand All @@ -25,7 +24,6 @@
coefficient_of_variation,
robust_coefficient_of_variation,
dispersion_ratio,
efficiency,
lloyds_index,
morisita_index,
quartile_coefficient_of_dispersion,
Expand Down Expand Up @@ -54,7 +52,6 @@ def test_mock_aggregation_functions(
coefficient_of_variation,
robust_coefficient_of_variation,
dispersion_ratio,
efficiency,
lloyds_index,
morisita_index,
quartile_coefficient_of_dispersion,
Expand All @@ -79,7 +76,6 @@ def test_dispersion_sensibility(func: typing.Callable, seed: int) -> None:
coefficient_of_variation,
robust_coefficient_of_variation,
quartile_coefficient_of_dispersion,
efficiency,
],
)
def test_cv_corner_cases(func: typing.Callable) -> None:
Expand All @@ -98,7 +94,6 @@ def test_cv_corner_cases(func: typing.Callable) -> None:
coefficient_of_variation,
robust_coefficient_of_variation,
dispersion_ratio,
efficiency,
lloyds_index,
morisita_index,
quartile_coefficient_of_dispersion,
Expand Down

0 comments on commit 4cf8628

Please sign in to comment.