changed docs, added mode skew, updated tests

glevv · Nov 21, 2023 · 6459155 · 6459155
1 parent b5e3916
commit 6459155
Show file tree

Hide file tree

Showing 12 changed files with 225 additions and 236 deletions.
diff --git a/src/obscure_stats/association/association.py b/src/obscure_stats/association/association.py
@@ -267,6 +267,7 @@ def tanimoto_similarity(x: np.ndarray, y: np.ndarray) -> float:
 
     It is very similar to Jaccard or Cosine similarity but differs in how
     dot product is normalized.
+    This version is designed for numeric values, instead of sets.
 
     Parameters
     ----------

diff --git a/src/obscure_stats/central_tendency/central_tendency.py b/src/obscure_stats/central_tendency/central_tendency.py
@@ -14,7 +14,7 @@ def midrange(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose midrange is desired.
+        Input array.
 
     Returns
     -------
@@ -40,7 +40,7 @@ def midhinge(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose midhinge is desired.
+        Input array.
 
     Returns
     -------
@@ -65,7 +65,7 @@ def trimean(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose trimean is desired.
+        Input array.
 
     Returns
     -------
@@ -92,7 +92,7 @@ def contraharmonic_mean(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose contraharmonic mean is desired.
+        Input array.
 
     Returns
     -------
@@ -116,7 +116,7 @@ def midmean(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose interquartile mean is desired.
+        Input array.
 
     Returns
     -------
@@ -164,9 +164,11 @@ def hodges_lehmann_sen_location(x: np.ndarray) -> float:
     This implementation uses cartesian product, so the time and memory complexity
     are N^2. It is best to not use it on large arrays.
     """
-    walsh_sums = np.asarray(x).reshape(-1, 1) + np.asarray(x).reshape(1, -1)
-    mask = np.triu_indices(len(x), 1)  # we need only upper trianle without diagonal
-    return np.nanmedian(walsh_sums[mask]) * 0.5
+    # In the original paper authors suggest use only upper triangular
+    # of the cartesian product, but in this implementation we use
+    # whole matrix, which is equvalent.
+    product = np.meshgrid(x, x, sparse=True)
+    return np.nanmedian(product[0] + product[1]) * 0.5
 
 
 def standard_trimmed_harrell_davis_quantile(x: np.ndarray, q: float = 0.5) -> float:

diff --git a/src/obscure_stats/dispersion/__init__.py b/src/obscure_stats/dispersion/__init__.py
@@ -5,8 +5,6 @@
     coefficient_of_variation,
     dispersion_ratio,
     efficiency,
-    hoover_index,
-    jains_fairness_index,
     lloyds_index,
     morisita_index,
     quartile_coefficient_of_dispersion,
@@ -20,12 +18,10 @@
     "coefficient_of_variation",
     "dispersion_ratio",
     "efficiency",
-    "hoover_index",
     "lloyds_index",
     "morisita_index",
     "quartile_coefficient_of_dispersion",
     "sqad",
     "studentized_range",
     "robust_coefficient_of_variation",
-    "jains_fairness_index",
 ]
diff --git a/src/obscure_stats/dispersion/dispersion.py b/src/obscure_stats/dispersion/dispersion.py
@@ -190,34 +190,6 @@ def dispersion_ratio(x: np.ndarray) -> float:
     return np.nanmean(x) / (stats.gmean(x, nan_policy="omit") + EPS)
 
 
-def hoover_index(x: np.ndarray) -> float:
-    """Calculate Hoover index.
-
-    It is also known as the Robin Hood index, Schutz index or Pietra ratio.
-
-    Mostly used as measure of income inequality.
-    A value of 0 represents total equality, and 1 represents perfect inequality.
-    In general - measure of uniformity of the distribution.
-
-    Parameters
-    ----------
-    x : array_like
-        Input array.
-
-    Returns
-    -------
-    hi : float or array_like.
-        The value of the Hoover index.
-
-    References
-    ----------
-    Hoover Jr, E. M. (1936).
-    The Measurement of Industrial Localization.
-    Review of Economics and Statistics, 18, No. 162-71.
-    """
-    return 0.5 * np.nansum(x - np.nanmean(x)) / np.nansum(x)
-
-
 def lloyds_index(x: np.ndarray) -> float:
     """Calculate Lloyd's index of mean crowding.
 
@@ -296,35 +268,3 @@ def sqad(x: np.ndarray) -> float:
     med = np.nanmedian(x)
     # constant value to maximize efficiency for normal distribution
     return np.nanquantile(np.abs(x - med), q=0.682689492137086)
-
-
-def jains_fairness_index(x: np.ndarray) -> float:
-    """Calculate Jain's Fairness Index.
-
-    Jain's Fairness Index is a fairness measures commonly used in network engineering.
-    The result ranges from 1/n (worst case) to 1 (best case),
-    and it is maximum when all users receive the same allocation.
-    In general - measure of uniformity of the distribution.
-
-    Parameters
-    ----------
-    x : array_like
-        Input array.
-
-    Returns
-    -------
-    jfi : float or array_like.
-        The value of the coefficient of variation.
-
-    References
-    ----------
-    Jain, R.; Chiu, D. M.; Hawe, W. (1984).
-    A Quantitative Measure of Fairness and Discrimination
-    for Resource Allocation in Shared Computer Systems.
-    DEC Research Report TR-301.
-    """
-    cv = coefficient_of_variation(x)
-    if cv is np.inf:
-        warnings.warn("CV is inf, Jain's Index is not defined.", stacklevel=2)
-        return np.inf
-    return 1.0 / (1.0 + cv**2)
diff --git a/src/obscure_stats/kurtosis/kurtosis.py b/src/obscure_stats/kurtosis/kurtosis.py
@@ -11,7 +11,7 @@ def moors_kurt(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Moor's kurtosis is desired.
+        Input array.
 
     Returns
     -------
@@ -35,7 +35,7 @@ def moors_octile_kurt(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Moor's octile kurtosis is desired.
+        Input array.
 
     Returns
     -------
@@ -64,7 +64,7 @@ def hogg_kurt(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Hogg's kurtosis coefficient is desired.s
+        Input array.
 
     Returns
     -------
@@ -96,7 +96,7 @@ def crow_siddiqui_kurt(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Crow & Siddiqui kurtosis coefficient is desired.
+        Input array.
 
     Returns
     -------
@@ -122,7 +122,7 @@ def reza_ma_kurt(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Reza & Ma kurtosis coefficient is desired.
+        Input array.
 
     Returns
     -------

diff --git a/src/obscure_stats/skewness/__init__.py b/src/obscure_stats/skewness/__init__.py
@@ -2,6 +2,7 @@
 
 from .skewness import (
     auc_skew_gamma,
+    bickel_mode_skew,
     bowley_skew,
     forhad_shorna_rank_skew,
     groeneveld_skew,
@@ -26,4 +27,5 @@
     "pearson_median_skew",
     "pearson_mode_skew",
     "pearson_halfmode_skew",
+    "bickel_mode_skew",
 ]
diff --git a/src/obscure_stats/skewness/skewness.py b/src/obscure_stats/skewness/skewness.py
@@ -16,7 +16,7 @@ def pearson_mode_skew(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Pearson's mode skew coefficient is desired.
+        Input array.
 
     Returns
     -------
@@ -44,7 +44,7 @@ def pearson_halfmode_skew(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Pearson's mode skew coefficient is desired.
+        Input array.
 
     Returns
     -------
@@ -63,13 +63,36 @@ def pearson_halfmode_skew(x: np.ndarray) -> float:
     return (mean - mode) / std
 
 
+def bickel_mode_skew(x: np.ndarray) -> float:
+    """Calculate Robust mode skew with half sample mode.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+
+    Returns
+    -------
+    phmods : float or array_like.
+        The value of Bickel's mode skew coefficient.
+
+    References
+    ----------
+    Bickel, D. R. (2002).
+    Robust estimators of the mode and skewness of continuous data.
+    Computational Statistics & Data Analysis, Elsevier, 39(2), 153-163.
+    """
+    mode = half_sample_mode(x)
+    return np.nanmean(np.sign(x - mode))
+
+
 def pearson_median_skew(x: np.ndarray) -> float:
     """Calculatie Pearson's median skew coefficient.
 
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Pearson's median skew coefficient is desired.
+        Input array.
 
     Returns
     -------
@@ -94,7 +117,7 @@ def medeen_skew(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Medeen's skewness statistic is desired.
+        Input array.
 
     Returns
     -------
@@ -121,7 +144,7 @@ def bowley_skew(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Bowley's skewness coefficinet is desired.
+        Input array.
 
     Returns
     -------
@@ -147,7 +170,7 @@ def groeneveld_skew(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Groeneveld's skewness coefficinet is desired.
+        Input array.
 
     Returns
     -------
@@ -175,7 +198,7 @@ def kelly_skew(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Kelly's skewness coefficinet is desired.
+        Input array.
 
     Returns
     -------
@@ -200,8 +223,7 @@ def hossain_adnan_skew(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Houssain and Adnan skewness coefficient
-        is desired.
+        Input array.
 
     Returns
     -------
@@ -226,8 +248,7 @@ def forhad_shorna_rank_skew(x: np.ndarray) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose Forhad-Shorna coefficient of Rank Skewness
-        is desired.
+        Input array.
 
     Returns
     -------
@@ -268,7 +289,7 @@ def auc_skew_gamma(x: np.ndarray, dp: float = 0.01) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose AUC Bowley skewness is desired.
+        Input array.
     dp : float, default = 0.01
         Step used in calculating area under the curve (integrating).
 
@@ -297,7 +318,7 @@ def wauc_skew_gamma(x: np.ndarray, dp: float = 0.01) -> float:
     Parameters
     ----------
     x : array_like
-        Array containing numbers whose AUC Bowley skewness is desired.
+        Input array.
     dp : float, default = 0.01
         Step used in calculating area under the curve (integrating).