Skip to content

Commit

Permalink
added cole index, added fisher index, removed lloyd index
Browse files Browse the repository at this point in the history
  • Loading branch information
glevv committed Jan 11, 2024
1 parent 5e9ea10 commit 33b2c5c
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 31 deletions.
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ repository-code: 'https://github.com/glevv/obscure_stats'
repository-artifact: 'https://pypi.org/project/obscure_stats'
abstract: Collection of lesser-known statistical measures
license: MIT
version: 0.2.0
version: 0.2.1
date-released: '2023-10-21'
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2023 Gleb Levitski
Copyright (c) 2023 Hleb Levitski

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@
- Collection of measures of dispersion - `obscure_stats/dispersion`:
* Coefficient of Range;
* Coefficient of Variation;
* Cole's Index of Dispersion;
* Dispersion Ratio;
* Fisher's Index of Dispersion;
* Linear Coefficient of Variation;
* Lloyds Index;
* Morisita Index;
* Morisita Index of Dispersion;
* Quartile Coefficient of Dispersion;
* Robust Coefficient of Variation;
* Shamos Estimator;
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "obscure_stats"
version = "0.2.0"
version = "0.2.1"
description = "Collection of lesser-known statistical functions"
authors = ["Hleb Levitski"]
readme = "README.md"
Expand Down
10 changes: 6 additions & 4 deletions src/obscure_stats/dispersion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
coefficient_of_lvariation,
coefficient_of_range,
coefficient_of_variation,
cole_index_of_dispersion,
dispersion_ratio,
lloyds_index,
morisita_index,
fisher_index_of_dispersion,
morisita_index_of_dispersion,
quartile_coefficient_of_dispersion,
robust_coefficient_of_variation,
shamos_estimator,
Expand All @@ -18,9 +19,10 @@
"coefficient_of_lvariation",
"coefficient_of_range",
"coefficient_of_variation",
"cole_index_of_dispersion",
"dispersion_ratio",
"lloyds_index",
"morisita_index",
"fisher_index_of_dispersion",
"morisita_index_of_dispersion",
"quartile_coefficient_of_dispersion",
"robust_coefficient_of_variation",
"shamos_estimator",
Expand Down
59 changes: 43 additions & 16 deletions src/obscure_stats/dispersion/dispersion.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,11 @@ def dispersion_ratio(x: np.ndarray) -> float:
return np.nanmean(x) / stats.gmean(_x, nan_policy="omit")


def lloyds_index(x: np.ndarray) -> float:
"""Calculate Lloyd's index of mean crowding.
def fisher_index_of_dispersion(x: np.ndarray) -> float:
"""Calculate Fisher's index of dispersion.
Lloyd's index of mean crowding (IMC) is the average number of other points
contained in the sample unit that contains a randomly chosen point.
It is very similar to the coefficient of variation but uses unnormalized
variation instead of the standard deviation.
Parameters
----------
Expand All @@ -193,25 +193,27 @@ def lloyds_index(x: np.ndarray) -> float:
Returns
-------
li : float
The value of the Lloyd's index.
fi : float
The value of the Fisher's index of dispersion.
References
----------
Lloyd, M. (1967).
Mean crowding.
J Anim Ecol. 36 (1): 1-30.
Fisher, R. A. (1925).
Statistical methods for research workers.
Hafner, New York.
"""
m = np.nanmean(x)
s = np.nanvar(x)
return m + s / (m - 1)
mean = np.nanmean(x)
if abs(mean) <= EPS:
warnings.warn("Mean is close to 0. Statistic is undefined.", stacklevel=2)
return np.inf
return (len(x) - 1) * np.nanvar(x) / mean


def morisita_index(x: np.ndarray) -> float:
def morisita_index_of_dispersion(x: np.ndarray) -> float:
"""Calculate Morisita's index of dispersion.
Morisita's index of dispersion (Im) is the scaled probability
that two points chosen at random from the whole population are in the same sample.
Morisita's index of dispersion is the scaled probability that two
points chosen at random from the whole population are in the same sample.
Parameters
----------
Expand Down Expand Up @@ -260,7 +262,8 @@ def standard_quantile_absolute_deviation(x: np.ndarray) -> float:
# finite sample correction
k = 1.0 + 0.762 / n + 0.967 / n**2
# constant value that maximizes efficiency for normal distribution
return k * np.nanquantile(np.abs(x - med), q=0.682689492137086)
q = 0.6826894921370850 # stats.norm.cdf(1) - stats.norm.cdf(-1)
return k * np.nanquantile(np.abs(x - med), q=q)


def shamos_estimator(x: np.ndarray) -> float:
Expand Down Expand Up @@ -325,3 +328,27 @@ def coefficient_of_range(x: np.ndarray) -> float:
warnings.warn("Midrange is close to 0. Statistic is undefined.", stacklevel=2)
return np.inf
return (max_ - min_) / (max_ + min_)


def cole_index_of_dispersion(x: np.ndarray) -> float:
"""Calculate Cole's index of dispersion.
Higher values mean higher dispersion.
Parameters
----------
x : array_like
Input array.
Returns
-------
ci : float
The value of the Cole's index of dispersion.
References
----------
Cole, L. C. (1946).
A theory for analyzing contagiously distributed populations.
Ecology. 27 (4): 329-341.
"""
return np.nansum(np.square(x)) / np.nansum(x) ** 2
15 changes: 9 additions & 6 deletions tests/test_dispersion.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
coefficient_of_lvariation,
coefficient_of_range,
coefficient_of_variation,
cole_index_of_dispersion,
dispersion_ratio,
lloyds_index,
morisita_index,
fisher_index_of_dispersion,
morisita_index_of_dispersion,
quartile_coefficient_of_dispersion,
robust_coefficient_of_variation,
shamos_estimator,
Expand All @@ -22,9 +23,10 @@
coefficient_of_lvariation,
coefficient_of_range,
coefficient_of_variation,
cole_index_of_dispersion,
dispersion_ratio,
lloyds_index,
morisita_index,
fisher_index_of_dispersion,
morisita_index_of_dispersion,
quartile_coefficient_of_dispersion,
robust_coefficient_of_variation,
shamos_estimator,
Expand Down Expand Up @@ -56,10 +58,11 @@ def test_mock_aggregation_functions(
[
coefficient_of_lvariation,
coefficient_of_variation,
cole_index_of_dispersion,
robust_coefficient_of_variation,
dispersion_ratio,
lloyds_index,
morisita_index,
fisher_index_of_dispersion,
morisita_index_of_dispersion,
quartile_coefficient_of_dispersion,
standard_quantile_absolute_deviation,
shamos_estimator,
Expand Down

0 comments on commit 33b2c5c

Please sign in to comment.