Skip to content

Commit

Permalink
changed docs, added mode skew, updated tests
Browse files Browse the repository at this point in the history
  • Loading branch information
glevv committed Nov 21, 2023
1 parent b5e3916 commit 6459155
Show file tree
Hide file tree
Showing 12 changed files with 225 additions and 236 deletions.
1 change: 1 addition & 0 deletions src/obscure_stats/association/association.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ def tanimoto_similarity(x: np.ndarray, y: np.ndarray) -> float:
It is very similar to Jaccard or Cosine similarity but differs in how
dot product is normalized.
This version is designed for numeric values, instead of sets.
Parameters
----------
Expand Down
18 changes: 10 additions & 8 deletions src/obscure_stats/central_tendency/central_tendency.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def midrange(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose midrange is desired.
Input array.
Returns
-------
Expand All @@ -40,7 +40,7 @@ def midhinge(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose midhinge is desired.
Input array.
Returns
-------
Expand All @@ -65,7 +65,7 @@ def trimean(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose trimean is desired.
Input array.
Returns
-------
Expand All @@ -92,7 +92,7 @@ def contraharmonic_mean(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose contraharmonic mean is desired.
Input array.
Returns
-------
Expand All @@ -116,7 +116,7 @@ def midmean(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose interquartile mean is desired.
Input array.
Returns
-------
Expand Down Expand Up @@ -164,9 +164,11 @@ def hodges_lehmann_sen_location(x: np.ndarray) -> float:
This implementation uses cartesian product, so the time and memory complexity
are N^2. It is best to not use it on large arrays.
"""
walsh_sums = np.asarray(x).reshape(-1, 1) + np.asarray(x).reshape(1, -1)
mask = np.triu_indices(len(x), 1) # we need only upper trianle without diagonal
return np.nanmedian(walsh_sums[mask]) * 0.5
# In the original paper authors suggest use only upper triangular
# of the cartesian product, but in this implementation we use
# whole matrix, which is equvalent.
product = np.meshgrid(x, x, sparse=True)
return np.nanmedian(product[0] + product[1]) * 0.5


def standard_trimmed_harrell_davis_quantile(x: np.ndarray, q: float = 0.5) -> float:
Expand Down
4 changes: 0 additions & 4 deletions src/obscure_stats/dispersion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
coefficient_of_variation,
dispersion_ratio,
efficiency,
hoover_index,
jains_fairness_index,
lloyds_index,
morisita_index,
quartile_coefficient_of_dispersion,
Expand All @@ -20,12 +18,10 @@
"coefficient_of_variation",
"dispersion_ratio",
"efficiency",
"hoover_index",
"lloyds_index",
"morisita_index",
"quartile_coefficient_of_dispersion",
"sqad",
"studentized_range",
"robust_coefficient_of_variation",
"jains_fairness_index",
]
60 changes: 0 additions & 60 deletions src/obscure_stats/dispersion/dispersion.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,34 +190,6 @@ def dispersion_ratio(x: np.ndarray) -> float:
return np.nanmean(x) / (stats.gmean(x, nan_policy="omit") + EPS)


def hoover_index(x: np.ndarray) -> float:
"""Calculate Hoover index.
It is also known as the Robin Hood index, Schutz index or Pietra ratio.
Mostly used as measure of income inequality.
A value of 0 represents total equality, and 1 represents perfect inequality.
In general - measure of uniformity of the distribution.
Parameters
----------
x : array_like
Input array.
Returns
-------
hi : float or array_like.
The value of the Hoover index.
References
----------
Hoover Jr, E. M. (1936).
The Measurement of Industrial Localization.
Review of Economics and Statistics, 18, No. 162-71.
"""
return 0.5 * np.nansum(x - np.nanmean(x)) / np.nansum(x)


def lloyds_index(x: np.ndarray) -> float:
"""Calculate Lloyd's index of mean crowding.
Expand Down Expand Up @@ -296,35 +268,3 @@ def sqad(x: np.ndarray) -> float:
med = np.nanmedian(x)
# constant value to maximize efficiency for normal distribution
return np.nanquantile(np.abs(x - med), q=0.682689492137086)


def jains_fairness_index(x: np.ndarray) -> float:
"""Calculate Jain's Fairness Index.
Jain's Fairness Index is a fairness measures commonly used in network engineering.
The result ranges from 1/n (worst case) to 1 (best case),
and it is maximum when all users receive the same allocation.
In general - measure of uniformity of the distribution.
Parameters
----------
x : array_like
Input array.
Returns
-------
jfi : float or array_like.
The value of the coefficient of variation.
References
----------
Jain, R.; Chiu, D. M.; Hawe, W. (1984).
A Quantitative Measure of Fairness and Discrimination
for Resource Allocation in Shared Computer Systems.
DEC Research Report TR-301.
"""
cv = coefficient_of_variation(x)
if cv is np.inf:
warnings.warn("CV is inf, Jain's Index is not defined.", stacklevel=2)
return np.inf
return 1.0 / (1.0 + cv**2)
10 changes: 5 additions & 5 deletions src/obscure_stats/kurtosis/kurtosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def moors_kurt(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Moor's kurtosis is desired.
Input array.
Returns
-------
Expand All @@ -35,7 +35,7 @@ def moors_octile_kurt(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Moor's octile kurtosis is desired.
Input array.
Returns
-------
Expand Down Expand Up @@ -64,7 +64,7 @@ def hogg_kurt(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Hogg's kurtosis coefficient is desired.s
Input array.
Returns
-------
Expand Down Expand Up @@ -96,7 +96,7 @@ def crow_siddiqui_kurt(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Crow & Siddiqui kurtosis coefficient is desired.
Input array.
Returns
-------
Expand All @@ -122,7 +122,7 @@ def reza_ma_kurt(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Reza & Ma kurtosis coefficient is desired.
Input array.
Returns
-------
Expand Down
2 changes: 2 additions & 0 deletions src/obscure_stats/skewness/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .skewness import (
auc_skew_gamma,
bickel_mode_skew,
bowley_skew,
forhad_shorna_rank_skew,
groeneveld_skew,
Expand All @@ -26,4 +27,5 @@
"pearson_median_skew",
"pearson_mode_skew",
"pearson_halfmode_skew",
"bickel_mode_skew",
]
47 changes: 34 additions & 13 deletions src/obscure_stats/skewness/skewness.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def pearson_mode_skew(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Pearson's mode skew coefficient is desired.
Input array.
Returns
-------
Expand Down Expand Up @@ -44,7 +44,7 @@ def pearson_halfmode_skew(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Pearson's mode skew coefficient is desired.
Input array.
Returns
-------
Expand All @@ -63,13 +63,36 @@ def pearson_halfmode_skew(x: np.ndarray) -> float:
return (mean - mode) / std


def bickel_mode_skew(x: np.ndarray) -> float:
"""Calculate Robust mode skew with half sample mode.
Parameters
----------
x : array_like
Input array.
Returns
-------
phmods : float or array_like.
The value of Bickel's mode skew coefficient.
References
----------
Bickel, D. R. (2002).
Robust estimators of the mode and skewness of continuous data.
Computational Statistics & Data Analysis, Elsevier, 39(2), 153-163.
"""
mode = half_sample_mode(x)
return np.nanmean(np.sign(x - mode))


def pearson_median_skew(x: np.ndarray) -> float:
"""Calculatie Pearson's median skew coefficient.
Parameters
----------
x : array_like
Array containing numbers whose Pearson's median skew coefficient is desired.
Input array.
Returns
-------
Expand All @@ -94,7 +117,7 @@ def medeen_skew(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Medeen's skewness statistic is desired.
Input array.
Returns
-------
Expand All @@ -121,7 +144,7 @@ def bowley_skew(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Bowley's skewness coefficinet is desired.
Input array.
Returns
-------
Expand All @@ -147,7 +170,7 @@ def groeneveld_skew(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Groeneveld's skewness coefficinet is desired.
Input array.
Returns
-------
Expand Down Expand Up @@ -175,7 +198,7 @@ def kelly_skew(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Kelly's skewness coefficinet is desired.
Input array.
Returns
-------
Expand All @@ -200,8 +223,7 @@ def hossain_adnan_skew(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Houssain and Adnan skewness coefficient
is desired.
Input array.
Returns
-------
Expand All @@ -226,8 +248,7 @@ def forhad_shorna_rank_skew(x: np.ndarray) -> float:
Parameters
----------
x : array_like
Array containing numbers whose Forhad-Shorna coefficient of Rank Skewness
is desired.
Input array.
Returns
-------
Expand Down Expand Up @@ -268,7 +289,7 @@ def auc_skew_gamma(x: np.ndarray, dp: float = 0.01) -> float:
Parameters
----------
x : array_like
Array containing numbers whose AUC Bowley skewness is desired.
Input array.
dp : float, default = 0.01
Step used in calculating area under the curve (integrating).
Expand Down Expand Up @@ -297,7 +318,7 @@ def wauc_skew_gamma(x: np.ndarray, dp: float = 0.01) -> float:
Parameters
----------
x : array_like
Array containing numbers whose AUC Bowley skewness is desired.
Input array.
dp : float, default = 0.01
Step used in calculating area under the curve (integrating).
Expand Down
Loading

0 comments on commit 6459155

Please sign in to comment.