From 63dd6d3cbe96b449561708bd4e992e46392e9e36 Mon Sep 17 00:00:00 2001
From: Gleb Levitski <36483986+glevv@users.noreply.github.com>
Date: Sat, 23 Nov 2024 18:56:30 +0200
Subject: [PATCH] added grenaders m, lqw, rqw and scmid-trede measure of
 peakedness

---
 CITATION.cff                                  |   2 +-
 pyproject.toml                                |   4 +-
 .../central_tendency/__init__.py              |   2 +
 .../central_tendency/central_tendency.py      |  50 ++++++++-
 src/obscure_stats/kurtosis/__init__.py        |   6 +
 src/obscure_stats/kurtosis/kurtosis.py        | 104 +++++++++++++++++-
 tests/test_association.py                     |  10 +-
 tests/test_central_tendency.py                |  21 +++-
 tests/test_dispersion.py                      |   8 +-
 tests/test_kurtosis.py                        |  26 ++++-
 tests/test_skewness.py                        |   8 +-
 tests/test_variation.py                       |   8 +-
 12 files changed, 224 insertions(+), 25 deletions(-)

diff --git a/CITATION.cff b/CITATION.cff
index 99e1600..70722fe 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -17,5 +17,5 @@ repository-code: 'https://github.com/glevv/obscure_stats'
 repository-artifact: 'https://pypi.org/project/obscure_stats'
 abstract: Collection of lesser-known statistical measures
 license: MIT
-version: 0.3.0
+version: 0.3.1
 date-released: '2023-10-21'
diff --git a/pyproject.toml b/pyproject.toml
index 2cfbb46..4e326b6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "obscure_stats"
-version = "0.3.0"
+version = "0.3.1"
 description = "Collection of lesser-known statistical functions"
 authors = ["Hleb Levitski"]
 readme = "README.md"
@@ -31,7 +31,7 @@ scipy = "^1.9.1"
 mypy = "^1.6.1"
 pytest = "^8.0.0"
 pytest-cov = "^5.0.0"
-ruff = "^0.5.0"
+ruff = "^0.7.0"
 hypothesis = "^6.103.1"
 hypothesis-pytest = "^0.19.0"
 
diff --git a/src/obscure_stats/central_tendency/__init__.py b/src/obscure_stats/central_tendency/__init__.py
index 2485b90..574b06e 100644
--- a/src/obscure_stats/central_tendency/__init__.py
+++ b/src/obscure_stats/central_tendency/__init__.py
@@ -2,6 +2,7 @@
 
 from .central_tendency import (
     contraharmonic_mean,
+    grenanders_m,
     half_sample_mode,
     hodges_lehmann_sen_location,
     midhinge,
@@ -14,6 +15,7 @@
 
 __all__ = [
     "contraharmonic_mean",
+    "grenanders_m",
     "half_sample_mode",
     "hodges_lehmann_sen_location",
     "midhinge",
diff --git a/src/obscure_stats/central_tendency/central_tendency.py b/src/obscure_stats/central_tendency/central_tendency.py
index 74a1f98..bcc80d6 100644
--- a/src/obscure_stats/central_tendency/central_tendency.py
+++ b/src/obscure_stats/central_tendency/central_tendency.py
@@ -257,7 +257,7 @@ def half_sample_mode(x: np.ndarray) -> float:
     y = y[np.isfinite(y)]
     _corner_cases = (4, 3)  # for 4 samples and 3 samples
     while (ny := len(y)) >= _corner_cases[0]:
-        half_y = ny // 2
+        half_y = math.ceil(ny / 2)
         w_min = y[-1] - y[0]
         for i in range(ny - half_y):
             w = y[i + half_y - 1] - y[i]
@@ -308,3 +308,51 @@ def tau_location(x: np.ndarray, c: float = 4.5) -> float:
     scaled_x = (x - med) / mad
     w = np.square(1.0 - np.square(scaled_x / c)) * (np.abs(scaled_x) <= c)
     return np.nansum(x * w) / np.nansum(w)
+
+
+def grenanders_m(x: np.ndarray, p: float = 1.5, k: int = 3) -> float:
+    """Calculate Grenander's Mode.
+
+    This measure is a direct nonparametric estimation of the mode.
+    It is not very robust to outliers.
+    It is recommended to set k > 2p, but it is only possible if x has
+    enough samples.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    p : float
+        Smoothing constant.
+    k : int
+        The number of samples to exclude from the calculation.
+
+    Returns
+    -------
+    gm : float
+        The value of Grenander's Mode
+
+    References
+    ----------
+    Grenander, U. (1965).
+    Some direct estimates of the mode.
+    Annals of Mathematical Statistics, 36, 131-138.
+    """
+    x_sort = np.sort(x)
+    x_sort = x_sort[np.isfinite(x_sort)]
+
+    if p <= 1:
+        msg = "Parameter p should be a float greater than 1."
+        raise ValueError(msg)
+    if (k <= 0) or (not isinstance(k, int)):
+        msg = "Parameter k should be an integer between 1 and lenght of x."
+        raise ValueError(msg)
+
+    if len(x_sort) <= k:
+        return np.nan
+
+    return (
+        0.5
+        * np.sum((x_sort[k:] + x_sort[:-k]) / np.power(x_sort[k:] - x_sort[:-k], p))
+        / np.sum(np.power(x_sort[k:] - x_sort[:-k], -p))
+    )
diff --git a/src/obscure_stats/kurtosis/__init__.py b/src/obscure_stats/kurtosis/__init__.py
index a5d0ad6..73c823e 100644
--- a/src/obscure_stats/kurtosis/__init__.py
+++ b/src/obscure_stats/kurtosis/__init__.py
@@ -4,9 +4,12 @@
     crow_siddiqui_kurt,
     hogg_kurt,
     l_kurt,
+    left_quantile_weight,
     moors_kurt,
     moors_octile_kurt,
     reza_ma_kurt,
+    right_quantile_weight,
+    schmid_trede_peakedness,
     staudte_kurt,
 )
 
@@ -14,8 +17,11 @@
     "crow_siddiqui_kurt",
     "hogg_kurt",
     "l_kurt",
+    "left_quantile_weight",
     "moors_kurt",
     "moors_octile_kurt",
     "reza_ma_kurt",
+    "right_quantile_weight",
+    "schmid_trede_peakedness",
     "staudte_kurt",
 ]
diff --git a/src/obscure_stats/kurtosis/kurtosis.py b/src/obscure_stats/kurtosis/kurtosis.py
index 88577eb..55d9b44 100644
--- a/src/obscure_stats/kurtosis/kurtosis.py
+++ b/src/obscure_stats/kurtosis/kurtosis.py
@@ -87,9 +87,7 @@ def moors_octile_kurt(x: np.ndarray) -> float:
     A quantile alternative for kurtosis.
     Journal of the Royal Statistical Society. Series D, 37(1):25-32.
     """
-    o1, o2, o3, o5, o6, o7 = np.nanquantile(
-        x, [0.125, 0.25, 0.375, 0.625, 0.750, 0.875]
-    )
+    o1, o2, o3, o5, o6, o7 = np.nanquantile(x, [0.125, 0.25, 0.375, 0.625, 0.75, 0.875])
     return ((o7 - o5) + (o3 - o1)) / (o6 - o2)
 
 
@@ -206,3 +204,103 @@ def staudte_kurt(x: np.ndarray) -> float:
     """
     p10, p33, p66, p90 = np.nanquantile(x, [0.1, 1 / 3, 2 / 3, 0.9])
     return (p90 - p10) / (p66 - p33)
+
+
+def schmid_trede_peakedness(x: np.ndarray) -> float:
+    """Calculate Schmid and Trder measure of peakedness P.
+
+    It is based on inter-percentile ranges (uncentered, unscaled) and
+    tries to compare two different measures of dispersion of the same
+    sample.
+    This measure should be more robust than moment based kurtosis.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+
+    Returns
+    -------
+    stp : float
+        The value of measure of peakedness P.
+
+    References
+    ----------
+    Schmid, F.; Trede, M. (2003).
+    Simple tests for peakedness, fat tails and leptokurtosis based on quantiles.
+    Computational Statistics and Data Analysis, 43, 1-12.
+    """
+    p125, p25, p75, p875 = np.nanquantile(x, [0.125, 0.25, 0.75, 0.875])
+    return (p875 - p125) / (p75 - p25)
+
+
+def left_quantile_weight(x: np.ndarray, q: float = 0.25) -> float:
+    """Calculate left quantile weight (LQW).
+
+    It is based on inter-percentile ranges (uncentered, unscaled) of the
+    left tail of the distribution.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    q : float
+        Quantile to use for the anchor.
+
+    Returns
+    -------
+    lqw : float
+        The value of left quantile weight.
+
+    References
+    ----------
+    Brys, G.; Hubert, M.; Struyf, A. (2006).
+    Robust measures of tail weight.
+    Computational Statistics and Data Analysis 50(3), 733-759.
+    """
+    min_q, max_q = 0.0, 0.5
+    if q <= min_q or q >= max_q:
+        msg = "Parameter q should be in range (0, 0.5)."
+        raise ValueError(msg)
+    lower_quantile, q025, upper_quantile = np.nanquantile(
+        x, [q * 0.5, 0.25, (1 - q) * 0.5]
+    )
+    return -(upper_quantile + lower_quantile - 2 * q025) / (
+        upper_quantile - lower_quantile
+    )
+
+
+def right_quantile_weight(x: np.ndarray, q: float = 0.75) -> float:
+    """Calculate right quantile weight (RQW).
+
+    It is based on inter-percentile ranges (uncentered, unscaled) of the
+    right tail of the distribution.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    q : float
+        Quantile to use for the anchor.
+
+    Returns
+    -------
+    rqw : float
+        The value of right quantile weight.
+
+    References
+    ----------
+    Brys, G.; Hubert, M.; Struyf, A. (2006).
+    Robust measures of tail weight.
+    Computational Statistics and Data Analysis 50(3), 733-759.
+    """
+    min_q, max_q = 0.5, 1.0
+    if q <= min_q or q >= max_q:
+        msg = "Parameter q should be in range (0.5, 1.0)."
+        raise ValueError(msg)
+    lower_quantile, q075, upper_quantile = np.nanquantile(
+        x, [1 - q * 0.5, 0.75, (1 + q) * 0.5]
+    )
+    return (lower_quantile + upper_quantile - 2 * q075) / (
+        lower_quantile - upper_quantile
+    )
diff --git a/tests/test_association.py b/tests/test_association.py
index d95b41b..364323e 100644
--- a/tests/test_association.py
+++ b/tests/test_association.py
@@ -1,5 +1,6 @@
 """Collection of tests of association module."""
 
+import math
 import typing
 
 import numpy as np
@@ -7,6 +8,7 @@
 from hypothesis import given
 from hypothesis import strategies as st
 from hypothesis.extra.numpy import array_shapes, arrays
+
 from obscure_stats.association import (
     blomqvist_beta,
     chatterjee_xi,
@@ -118,7 +120,7 @@ def test_const(func: typing.Callable, y_array_float: np.ndarray) -> None:
     x = np.ones(shape=(len(y_array_float),))
     with pytest.warns(match="is constant"):
         res = func(x, y_array_float)
-        if res is not np.nan:
+        if not math.isnan(res):
             msg = f"Corr coef should be 0 with constant input, got {res}"
             raise ValueError(msg)
 
@@ -129,7 +131,7 @@ def test_const_after_prep(
 ) -> None:
     """Testing for constant input."""
     res = func(x_array_float, corr_test_data)
-    if res is not np.nan:
+    if not math.isnan(res):
         msg = f"Corr coef should be 0 with constant input, got {res}"
         raise ValueError(msg)
 
@@ -167,13 +169,13 @@ def test_notfinite_association(
     y_array_int: np.ndarray,
 ) -> None:
     """Test for correct nan behaviour."""
-    if np.isnan(func(x_array_nan, y_array_int)):
+    if math.isnan(func(x_array_nan, y_array_int)):
         msg = "Corr coef should support nans."
         raise ValueError(msg)
     with pytest.warns(match="too many missing values"):
         func(x_array_nan[:2], x_array_int[:2])
     with pytest.warns(match="contains inf"):
-        if not np.isnan(func(x_array_int, y_array_inf)):
+        if not math.isnan(func(x_array_int, y_array_inf)):
             msg = "Corr coef should support infs."
             raise ValueError(msg)
 
diff --git a/tests/test_central_tendency.py b/tests/test_central_tendency.py
index 77e69f1..d53723b 100644
--- a/tests/test_central_tendency.py
+++ b/tests/test_central_tendency.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import math
 import typing
 
 import numpy as np
@@ -9,8 +10,10 @@
 from hypothesis import given
 from hypothesis import strategies as st
 from hypothesis.extra.numpy import array_shapes, arrays
+
 from obscure_stats.central_tendency import (
     contraharmonic_mean,
+    grenanders_m,
     half_sample_mode,
     hodges_lehmann_sen_location,
     midhinge,
@@ -23,6 +26,7 @@
 
 all_functions = [
     contraharmonic_mean,
+    grenanders_m,
     half_sample_mode,
     hodges_lehmann_sen_location,
     midhinge,
@@ -67,7 +71,7 @@ def test_edge_cases(x_array_float: np.ndarray) -> None:
         msg = "Result does not match expected output."
         raise ValueError(msg)
     result = standard_trimmed_harrell_davis_quantile(x_array_float[:0])
-    if result is not np.nan:
+    if not math.isnan(result):
         msg = "Result does not match expected output."
         raise ValueError(msg)
 
@@ -114,7 +118,7 @@ def test_hsm(hsm_test_data: np.ndarray) -> None:
 @pytest.mark.parametrize("func", all_functions)
 def test_statistic_with_nans(func: typing.Callable, x_array_nan: np.ndarray) -> None:
     """Test for different data types."""
-    if np.isnan(func(x_array_nan)):
+    if math.isnan(func(x_array_nan)):
         msg = "Statistic should not return nans."
         raise ValueError(msg)
 
@@ -126,6 +130,19 @@ def test_tau_location(x_array_float: np.ndarray, c: float) -> None:
         tau_location(x_array_float, c=c)
 
 
+def test_grenaders_m(x_array_float: np.ndarray) -> None:
+    """Test that function will raise error if p or k parameters are incorrect."""
+    with pytest.raises(ValueError, match="Parameter p should be a float"):
+        grenanders_m(x_array_float, p=1)
+    with pytest.raises(ValueError, match="Parameter k should be an integer"):
+        grenanders_m(x_array_float, k=0)
+    with pytest.raises(ValueError, match="Parameter k should be an integer"):
+        grenanders_m(x_array_float, k=1.5)  # type: ignore[arg-type]
+    if not math.isnan(grenanders_m(x_array_float, k=len(x_array_float))):
+        msg = "Statistic should return nans."
+        raise ValueError(msg)
+
+
 @given(
     arrays(
         dtype=np.float64,
diff --git a/tests/test_dispersion.py b/tests/test_dispersion.py
index 9e8ae33..ebce60d 100644
--- a/tests/test_dispersion.py
+++ b/tests/test_dispersion.py
@@ -1,5 +1,6 @@
 """Collection of tests of dispersion module."""
 
+import math
 import typing
 
 import numpy as np
@@ -7,6 +8,7 @@
 from hypothesis import given
 from hypothesis import strategies as st
 from hypothesis.extra.numpy import array_shapes, arrays
+
 from obscure_stats.dispersion import (
     coefficient_of_lvariation,
     coefficient_of_range,
@@ -76,8 +78,8 @@ def test_mock_aggregation_functions(
 def test_dispersion_sensibility(func: typing.Callable, seed: int) -> None:
     """Testing for result correctness."""
     rng = np.random.default_rng(seed)
-    low_disp = np.round(rng.exponential(scale=1, size=100) + 1, 2)
-    high_disp = np.round(rng.exponential(scale=10, size=100) + 1, 2)
+    low_disp = np.round(rng.exponential(scale=1, size=99) + 1, 2)
+    high_disp = np.round(rng.exponential(scale=10, size=99) + 1, 2)
     low_disp_res = func(low_disp)
     high_disp_res = func(high_disp)
     if low_disp_res > high_disp_res:
@@ -91,7 +93,7 @@ def test_dispersion_sensibility(func: typing.Callable, seed: int) -> None:
 @pytest.mark.parametrize("func", all_functions)
 def test_statistic_with_nans(func: typing.Callable, x_array_nan: np.ndarray) -> None:
     """Test for different data types."""
-    if np.isnan(func(x_array_nan)):
+    if math.isnan(func(x_array_nan)):
         msg = "Statistic should not return nans."
         raise ValueError(msg)
 
diff --git a/tests/test_kurtosis.py b/tests/test_kurtosis.py
index 75d92b1..331209d 100644
--- a/tests/test_kurtosis.py
+++ b/tests/test_kurtosis.py
@@ -1,5 +1,6 @@
 """Collection of tests of kurtosis module."""
 
+import math
 import typing
 
 import numpy as np
@@ -7,13 +8,17 @@
 from hypothesis import given
 from hypothesis import strategies as st
 from hypothesis.extra.numpy import array_shapes, arrays
+
 from obscure_stats.kurtosis import (
     crow_siddiqui_kurt,
     hogg_kurt,
     l_kurt,
+    left_quantile_weight,
     moors_kurt,
     moors_octile_kurt,
     reza_ma_kurt,
+    right_quantile_weight,
+    schmid_trede_peakedness,
     staudte_kurt,
 )
 
@@ -21,9 +26,12 @@
     crow_siddiqui_kurt,
     hogg_kurt,
     l_kurt,
+    left_quantile_weight,
     moors_kurt,
     moors_octile_kurt,
     reza_ma_kurt,
+    right_quantile_weight,
+    schmid_trede_peakedness,
     staudte_kurt,
 ]
 
@@ -45,10 +53,14 @@ def test_mock_aggregation_functions(
 def test_kurt_sensibility(func: typing.Callable, seed: int) -> None:
     """Testing for result correctness."""
     rng = np.random.default_rng(seed)
-    platy = rng.uniform(size=100)
-    lepto = rng.laplace(size=100)
+    platy = rng.uniform(size=99)
+    lepto = rng.laplace(size=99)
     platy_res = func(platy)
     lepto_res = func(lepto)
+    if func.__name__ == "right_quantile_weight":
+        # ugly but more harmonized this way
+        platy_res = -platy_res
+        lepto_res = -lepto_res
     if platy_res > lepto_res:
         msg = (
             f"Kurtosis in the first case should be lower, got {platy_res} > {lepto_res}"
@@ -59,11 +71,19 @@ def test_kurt_sensibility(func: typing.Callable, seed: int) -> None:
 @pytest.mark.parametrize("func", all_functions)
 def test_statistic_with_nans(func: typing.Callable, x_array_nan: np.ndarray) -> None:
     """Test for different data types."""
-    if np.isnan(func(x_array_nan)):
+    if math.isnan(func(x_array_nan)):
         msg = "Statistic should not return nans."
         raise ValueError(msg)
 
 
+@pytest.mark.parametrize("func", [right_quantile_weight, left_quantile_weight])
+@pytest.mark.parametrize("q", [0.0, 1.0])
+def test_q_in_qw(x_array_float: np.ndarray, func: typing.Callable, q: float) -> None:
+    """Simple tets case for correctnes of q."""
+    with pytest.raises(ValueError, match="Parameter q should be in range"):
+        func(x_array_float, q=q)
+
+
 @given(
     arrays(
         dtype=np.float64,
diff --git a/tests/test_skewness.py b/tests/test_skewness.py
index 662ac21..de64a13 100644
--- a/tests/test_skewness.py
+++ b/tests/test_skewness.py
@@ -1,5 +1,6 @@
 """Collection of tests of skewness module."""
 
+import math
 import typing
 
 import numpy as np
@@ -7,6 +8,7 @@
 from hypothesis import given
 from hypothesis import strategies as st
 from hypothesis.extra.numpy import array_shapes, arrays
+
 from obscure_stats.skewness import (
     auc_skew_gamma,
     bickel_mode_skew,
@@ -57,8 +59,8 @@ def test_mock_aggregation_functions(
 def test_skew_sensibility(func: typing.Callable, seed: int) -> None:
     """Testing for result correctness."""
     rng = np.random.default_rng(seed)
-    no_skew = np.round(rng.normal(size=100), 2)
-    left_skew = np.round(rng.exponential(size=100) + 1, 2)
+    no_skew = np.round(rng.normal(size=99), 2)
+    left_skew = np.round(rng.exponential(size=99) + 1, 2)
     no_skew_res = func(no_skew)
     left_skew_res = func(left_skew)
     if no_skew_res > left_skew_res:
@@ -81,7 +83,7 @@ def test_rank_skew(rank_skewness_test_data: np.ndarray) -> None:
 @pytest.mark.parametrize("func", all_functions)
 def test_statistic_with_nans(func: typing.Callable, x_array_nan: np.ndarray) -> None:
     """Test for different data types."""
-    if np.isnan(func(x_array_nan)):
+    if math.isnan(func(x_array_nan)):
         msg = "Statistic should not return nans."
         raise ValueError(msg)
 
diff --git a/tests/test_variation.py b/tests/test_variation.py
index 2da65e9..b6eacbb 100644
--- a/tests/test_variation.py
+++ b/tests/test_variation.py
@@ -1,5 +1,6 @@
 """Collection of tests of variation module."""
 
+import math
 import typing
 
 import numpy as np
@@ -7,6 +8,7 @@
 from hypothesis import given
 from hypothesis import strategies as st
 from hypothesis.extra.numpy import array_shapes, arrays
+
 from obscure_stats.variation import (
     avdev,
     b_index,
@@ -47,8 +49,8 @@ def test_mock_variation_functions(
 def test_var_sensibility_higher_better(func: typing.Callable, seed: int) -> None:
     """Testing for result correctness."""
     rng = np.random.default_rng(seed)
-    low_var = rng.choice(["a", "b", "c", "d"], p=[0.25, 0.25, 0.25, 0.25], size=100)
-    high_var = rng.choice(["a", "b", "c", "d"], p=[0.75, 0.15, 0.05, 0.05], size=100)
+    low_var = rng.choice(["a", "b", "c", "d"], p=[0.25, 0.25, 0.25, 0.25], size=99)
+    high_var = rng.choice(["a", "b", "c", "d"], p=[0.75, 0.15, 0.05, 0.05], size=99)
     low_var_res = func(low_var)
     high_var_res = func(high_var)
     if low_var_res < high_var_res:
@@ -59,7 +61,7 @@ def test_var_sensibility_higher_better(func: typing.Callable, seed: int) -> None
 @pytest.mark.parametrize("func", all_functions)
 def test_statistic_with_nans(func: typing.Callable, c_array_nan: np.ndarray) -> None:
     """Test for different data types."""
-    if np.isnan(func(c_array_nan)):
+    if math.isnan(func(c_array_nan)):
         msg = "Statistic should not return nans."
         raise ValueError(msg)