Skip to content

Commit

Permalink
added l-skew, l-kurt, changed l-cv, added range coefficient, xi corr …
Browse files Browse the repository at this point in the history
…refactoring
  • Loading branch information
glevv committed Dec 16, 2023
1 parent 9f065cd commit b557a4e
Show file tree
Hide file tree
Showing 19 changed files with 229 additions and 75 deletions.
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ repository-code: 'https://github.com/glevv/obscure_stats'
repository-artifact: 'https://pypi.org/project/obscure_stats'
abstract: Collection of lesser-known statistical measures
license: MIT
version: 0.1.6
version: 0.1.7
date-released: '2023-10-21'
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,13 @@
* Groeneveld Skewness Coefficient;
* Hossain-Adnan Skewness Coefficient;
* Kelly Skewness Coefficient;
* L-Skewness;
* Medeen Skewness Coefficient;
* Pearson Median Skewness Coefficient;
* Pearson Mode Skewness Coefficient.
- Collection of measures of kurtosis - `obscure_stats/kurtosis`:
* Crow-Siddiqui Kurtosis;
* L-Kurtosis;
* Hogg Kurtosis;
* Moors Kurtosis;
* Moors Octile Kurtosis;
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "obscure_stats"
version = "0.1.6"
version = "0.1.7"
description = "Collection of lesser-known statistical functions"
authors = ["Gleb Levitski"]
readme = "README.md"
Expand Down
6 changes: 3 additions & 3 deletions src/obscure_stats/association/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
)

__all__ = [
"chatterjeexi",
"concordance_corrcoef",
"concordance_rate",
"chatterjeexi",
"zhangi",
"tanimoto_similarity",
"symmetric_chatterjeexi",
"tanimoto_similarity",
"zhangi",
]
39 changes: 31 additions & 8 deletions src/obscure_stats/association/association.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,15 @@ def chatterjeexi(x: np.ndarray, y: np.ndarray) -> float:
if _check_arrays(x, y):
return np.nan
x, y = _prep_arrays(x, y)
# heavily inspired by https://github.com/czbiohub-sf/xicor/issues/17#issue-965635013
n = len(x)
x_ranked = stats.rankdata(x, method="ordinal")
y_forward_ranked = stats.rankdata(y, method="max")
y_backward_ranked = stats.rankdata(-y, method="max")
y_forward_ranked_ordered = y_forward_ranked[np.argsort(x_ranked)]
nom = np.sum(np.abs(np.diff(y_forward_ranked_ordered)))
denom = np.sum(y_backward_ranked * (n - y_backward_ranked)) / n**3
return 1.0 - nom / (2 * n**2 * denom)
y_forward_ordered = y[np.argsort(x)]
_, y_unique_indexes, y_counts = np.unique(
y_forward_ordered, return_inverse=True, return_counts=True
)
right = np.cumsum(y_counts)[y_unique_indexes]
left = np.cumsum(y_counts[::-1])[len(y_counts) - y_unique_indexes - 1]
return 1.0 - 0.5 * np.sum(np.abs(np.diff(right))) / np.mean(left * (n - left))


def concordance_corrcoef(x: np.ndarray, y: np.ndarray) -> float:
Expand Down Expand Up @@ -231,7 +232,29 @@ def symmetric_chatterjeexi(x: np.ndarray, y: np.ndarray) -> float:
--------
obscure_stats.associaton.chatterjeexi - Chatterjee Xi coefficient.
"""
return max(chatterjeexi(x, y), chatterjeexi(y, x))
if _check_arrays(x, y):
return np.nan
x, y = _prep_arrays(x, y)
n = len(x)
# y ~ f(x)
y_forward_ordered = y[np.argsort(x)]
_, y_unique_indexes, y_counts = np.unique(
y_forward_ordered, return_inverse=True, return_counts=True
)
right_xy = np.cumsum(y_counts)[y_unique_indexes]
left_xy = np.cumsum(y_counts[::-1])[len(y_counts) - y_unique_indexes - 1]
# x ~ f(y)
x_forward_ordered = x[np.argsort(y)]
_, x_unique_indexes, x_counts = np.unique(
x_forward_ordered, return_inverse=True, return_counts=True
)
right_yx = np.cumsum(x_counts)[x_unique_indexes]
left_yx = np.cumsum(x_counts[::-1])[len(x_counts) - x_unique_indexes - 1]
# choose the highest from the two
return 1.0 - min(
0.5 * np.sum(np.abs(np.diff(right_xy))) / np.mean(left_xy * (n - left_xy)),
0.5 * np.sum(np.abs(np.diff(right_yx))) / np.mean(left_yx * (n - left_yx)),
)


def zhangi(x: np.ndarray, y: np.ndarray) -> float:
Expand Down
6 changes: 3 additions & 3 deletions src/obscure_stats/central_tendency/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@

__all__ = [
"contraharmonic_mean",
"half_sample_mode",
"hodges_lehmann_sen_location",
"midhinge",
"midmean",
"midrange",
"trimean",
"hodges_lehmann_sen_location",
"standard_trimmed_harrell_davis_quantile",
"half_sample_mode",
"trimean",
]
10 changes: 5 additions & 5 deletions src/obscure_stats/central_tendency/central_tendency.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def midrange(x: np.ndarray) -> float:
Returns
-------
mr : float or array_like.
mr : float
The value of the midrange.
References
Expand All @@ -44,7 +44,7 @@ def midhinge(x: np.ndarray) -> float:
Returns
-------
mh : float or array_like.
mh : float
The value of the midhinge.
References
Expand All @@ -69,7 +69,7 @@ def trimean(x: np.ndarray) -> float:
Returns
-------
tm : float or array_like.
tm : float
The value of the trimean.
References
Expand All @@ -96,7 +96,7 @@ def contraharmonic_mean(x: np.ndarray) -> float:
Returns
-------
chm : float or array_like.
chm : float
The value of the contraharmonic mean.
References
Expand All @@ -120,7 +120,7 @@ def midmean(x: np.ndarray) -> float:
Returns
-------
iqm : float or array_like.
iqm : float
The value of the interquartile mean.
References
Expand Down
6 changes: 4 additions & 2 deletions src/obscure_stats/dispersion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .dispersion import (
coefficient_of_lvariation,
coefficient_of_range,
coefficient_of_variation,
dispersion_ratio,
lloyds_index,
Expand All @@ -15,13 +16,14 @@

__all__ = [
"coefficient_of_lvariation",
"coefficient_of_range",
"coefficient_of_variation",
"dispersion_ratio",
"lloyds_index",
"morisita_index",
"quartile_coefficient_of_dispersion",
"standard_quantile_absolute_deviation",
"studentized_range",
"robust_coefficient_of_variation",
"shamos_estimator",
"standard_quantile_absolute_deviation",
"studentized_range",
]
53 changes: 42 additions & 11 deletions src/obscure_stats/dispersion/dispersion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import warnings

import numpy as np
from scipy import stats # type: ignore[import-untyped]
from scipy import special, stats # type: ignore[import-untyped]

EPS = 1e-6

Expand All @@ -18,7 +18,7 @@ def studentized_range(x: np.ndarray) -> float:
Returns
-------
sr : float or array_like.
sr : float
The value of the studentized range.
References
Expand Down Expand Up @@ -46,7 +46,7 @@ def coefficient_of_lvariation(x: np.ndarray) -> float:
Returns
-------
lcv : float or array_like.
lcv : float
The value of the linear coefficient of variation.
References
Expand All @@ -60,7 +60,11 @@ def coefficient_of_lvariation(x: np.ndarray) -> float:
if abs(l1) <= EPS:
warnings.warn("Mean is close to 0. Statistic is undefined.", stacklevel=2)
return np.inf
l2 = np.nanmean(np.abs(x - l1)) * 0.5
n = len(x)
_x = np.sort(x)
common = 1 / special.comb(n - 1, 1) / n
beta_1 = common * np.nansum(special.comb(np.arange(1, n), 1) * _x[1:])
l2 = 2 * beta_1 - l1
return l2 / l1


Expand All @@ -74,7 +78,7 @@ def coefficient_of_variation(x: np.ndarray) -> float:
Returns
-------
cv : float or array_like.
cv : float
The value of the coefficient of variation.
References
Expand Down Expand Up @@ -103,7 +107,7 @@ def robust_coefficient_of_variation(x: np.ndarray) -> float:
Returns
-------
rcv : float or array_like.
rcv : float
The value of the robust coefficient of variation.
References
Expand All @@ -130,7 +134,7 @@ def quartile_coefficient_of_dispersion(x: np.ndarray) -> float:
Returns
-------
qcd : float or array_like.
qcd : float
The value of the quartile coefficient of dispersion.
References
Expand Down Expand Up @@ -161,7 +165,7 @@ def dispersion_ratio(x: np.ndarray) -> float:
Returns
-------
dr : float or array_like.
dr : float
The value of the dispersion ratio.
References
Expand Down Expand Up @@ -189,7 +193,7 @@ def lloyds_index(x: np.ndarray) -> float:
Returns
-------
li : float or array_like.
li : float
The value of the Lloyd's index.
References
Expand All @@ -216,7 +220,7 @@ def morisita_index(x: np.ndarray) -> float:
Returns
-------
mi : float or array_like.
mi : float
The value of the Morisita's index.
References
Expand All @@ -242,7 +246,7 @@ def standard_quantile_absolute_deviation(x: np.ndarray) -> float:
Returns
-------
sqad : float or array_like.
sqad : float
The value of the SQAD.
References
Expand Down Expand Up @@ -294,3 +298,30 @@ def shamos_estimator(x: np.ndarray) -> float:
# whole matrix, which is equvalent.
product = np.meshgrid(x, x, sparse=True)
return np.nanmedian(np.abs(product[0] - product[1]))


def coefficient_of_range(x: np.ndarray) -> float:
"""Calculate coefficient of range (Range / Midrange).
Parameters
----------
x : array_like
Input array.
Returns
-------
cr : float
The value of the linear coefficient of variation.
References
----------
Yadav, S. K., Singh, S., & Gupta, R. (2019).
Measures of Dispersion.
In Biomedical Statistics (pp. 59-70). Springer, Singapore
"""
min_ = np.nanmin(x)
max_ = np.nanmax(x)
if abs(min_ + max_) <= EPS:
warnings.warn("Midrange is close to 0. Statistic is undefined.", stacklevel=2)
return np.inf
return (max_ - min_) / (max_ + min_)
2 changes: 2 additions & 0 deletions src/obscure_stats/kurtosis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .kurtosis import (
crow_siddiqui_kurt,
hogg_kurt,
l_kurt,
moors_kurt,
moors_octile_kurt,
reza_ma_kurt,
Expand All @@ -14,4 +15,5 @@
"moors_kurt",
"moors_octile_kurt",
"reza_ma_kurt",
"l_kurt",
]
Loading

0 comments on commit b557a4e

Please sign in to comment.