From 33b2c5c81e309e66aafaae9d9276e354364a9e0e Mon Sep 17 00:00:00 2001 From: Gleb Levitski <36483986+glevv@users.noreply.github.com> Date: Thu, 11 Jan 2024 20:24:07 +0200 Subject: [PATCH] added cole index, added fisher index, removed lloyd index --- CITATION.cff | 2 +- LICENSE.txt | 2 +- README.md | 5 +- pyproject.toml | 2 +- src/obscure_stats/dispersion/__init__.py | 10 ++-- src/obscure_stats/dispersion/dispersion.py | 59 ++++++++++++++++------ tests/test_dispersion.py | 15 +++--- 7 files changed, 64 insertions(+), 31 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 45c46a1..e41d0aa 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -18,5 +18,5 @@ repository-code: 'https://github.com/glevv/obscure_stats' repository-artifact: 'https://pypi.org/project/obscure_stats' abstract: Collection of lesser-known statistical measures license: MIT -version: 0.2.0 +version: 0.2.1 date-released: '2023-10-21' \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt index 028f594..855f4d6 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 Gleb Levitski +Copyright (c) 2023 Hleb Levitski Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index e3a2c6d..5cca5ed 100644 --- a/README.md +++ b/README.md @@ -24,10 +24,11 @@ - Collection of measures of dispersion - `obscure_stats/dispersion`: * Coefficient of Range; * Coefficient of Variation; + * Cole's Index of Dispersion; * Dispersion Ratio; + * Fisher's Index of Dispersion; * Linear Coefficient of Variation; - * Lloyds Index; - * Morisita Index; + * Morisita Index of Dispersion; * Quartile Coefficient of Dispersion; * Robust Coefficient of Variation; * Shamos Estimator; diff --git a/pyproject.toml b/pyproject.toml index 3a87497..bb9a650 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "obscure_stats" -version = "0.2.0" +version = "0.2.1" description = "Collection of lesser-known statistical functions" authors = ["Hleb Levitski"] readme = "README.md" diff --git a/src/obscure_stats/dispersion/__init__.py b/src/obscure_stats/dispersion/__init__.py index 7be4369..4f86b46 100644 --- a/src/obscure_stats/dispersion/__init__.py +++ b/src/obscure_stats/dispersion/__init__.py @@ -4,9 +4,10 @@ coefficient_of_lvariation, coefficient_of_range, coefficient_of_variation, + cole_index_of_dispersion, dispersion_ratio, - lloyds_index, - morisita_index, + fisher_index_of_dispersion, + morisita_index_of_dispersion, quartile_coefficient_of_dispersion, robust_coefficient_of_variation, shamos_estimator, @@ -18,9 +19,10 @@ "coefficient_of_lvariation", "coefficient_of_range", "coefficient_of_variation", + "cole_index_of_dispersion", "dispersion_ratio", - "lloyds_index", - "morisita_index", + "fisher_index_of_dispersion", + "morisita_index_of_dispersion", "quartile_coefficient_of_dispersion", "robust_coefficient_of_variation", "shamos_estimator", diff --git a/src/obscure_stats/dispersion/dispersion.py b/src/obscure_stats/dispersion/dispersion.py index 4dc75aa..d1ac0b1 100644 --- a/src/obscure_stats/dispersion/dispersion.py +++ b/src/obscure_stats/dispersion/dispersion.py @@ -180,11 +180,11 @@ def dispersion_ratio(x: np.ndarray) -> float: return np.nanmean(x) / stats.gmean(_x, nan_policy="omit") -def lloyds_index(x: np.ndarray) -> float: - """Calculate Lloyd's index of mean crowding. +def fisher_index_of_dispersion(x: np.ndarray) -> float: + """Calculate Fisher's index of dispersion. - Lloyd's index of mean crowding (IMC) is the average number of other points - contained in the sample unit that contains a randomly chosen point. + It is very similar to the coefficient of variation but uses unnormalized + variation instead of the standard deviation. Parameters ---------- @@ -193,25 +193,27 @@ def lloyds_index(x: np.ndarray) -> float: Returns ------- - li : float - The value of the Lloyd's index. + fi : float + The value of the Fisher's index of dispersion. References ---------- - Lloyd, M. (1967). - Mean crowding. - J Anim Ecol. 36 (1): 1-30. + Fisher, R. A. (1925). + Statistical methods for research workers. + Hafner, New York. """ - m = np.nanmean(x) - s = np.nanvar(x) - return m + s / (m - 1) + mean = np.nanmean(x) + if abs(mean) <= EPS: + warnings.warn("Mean is close to 0. Statistic is undefined.", stacklevel=2) + return np.inf + return (len(x) - 1) * np.nanvar(x) / mean -def morisita_index(x: np.ndarray) -> float: +def morisita_index_of_dispersion(x: np.ndarray) -> float: """Calculate Morisita's index of dispersion. - Morisita's index of dispersion (Im) is the scaled probability - that two points chosen at random from the whole population are in the same sample. + Morisita's index of dispersion is the scaled probability that two + points chosen at random from the whole population are in the same sample. Parameters ---------- @@ -260,7 +262,8 @@ def standard_quantile_absolute_deviation(x: np.ndarray) -> float: # finite sample correction k = 1.0 + 0.762 / n + 0.967 / n**2 # constant value that maximizes efficiency for normal distribution - return k * np.nanquantile(np.abs(x - med), q=0.682689492137086) + q = 0.6826894921370850 # stats.norm.cdf(1) - stats.norm.cdf(-1) + return k * np.nanquantile(np.abs(x - med), q=q) def shamos_estimator(x: np.ndarray) -> float: @@ -325,3 +328,27 @@ def coefficient_of_range(x: np.ndarray) -> float: warnings.warn("Midrange is close to 0. Statistic is undefined.", stacklevel=2) return np.inf return (max_ - min_) / (max_ + min_) + + +def cole_index_of_dispersion(x: np.ndarray) -> float: + """Calculate Cole's index of dispersion. + + Higher values mean higher dispersion. + + Parameters + ---------- + x : array_like + Input array. + + Returns + ------- + ci : float + The value of the Cole's index of dispersion. + + References + ---------- + Cole, L. C. (1946). + A theory for analyzing contagiously distributed populations. + Ecology. 27 (4): 329-341. + """ + return np.nansum(np.square(x)) / np.nansum(x) ** 2 diff --git a/tests/test_dispersion.py b/tests/test_dispersion.py index a6a1c7e..f7ea692 100644 --- a/tests/test_dispersion.py +++ b/tests/test_dispersion.py @@ -8,9 +8,10 @@ coefficient_of_lvariation, coefficient_of_range, coefficient_of_variation, + cole_index_of_dispersion, dispersion_ratio, - lloyds_index, - morisita_index, + fisher_index_of_dispersion, + morisita_index_of_dispersion, quartile_coefficient_of_dispersion, robust_coefficient_of_variation, shamos_estimator, @@ -22,9 +23,10 @@ coefficient_of_lvariation, coefficient_of_range, coefficient_of_variation, + cole_index_of_dispersion, dispersion_ratio, - lloyds_index, - morisita_index, + fisher_index_of_dispersion, + morisita_index_of_dispersion, quartile_coefficient_of_dispersion, robust_coefficient_of_variation, shamos_estimator, @@ -56,10 +58,11 @@ def test_mock_aggregation_functions( [ coefficient_of_lvariation, coefficient_of_variation, + cole_index_of_dispersion, robust_coefficient_of_variation, dispersion_ratio, - lloyds_index, - morisita_index, + fisher_index_of_dispersion, + morisita_index_of_dispersion, quartile_coefficient_of_dispersion, standard_quantile_absolute_deviation, shamos_estimator,