Skip to content

Commit

Permalink
added jfi, changed docs, added tests
Browse files Browse the repository at this point in the history
  • Loading branch information
glevv committed Nov 19, 2023
1 parent 2f295db commit b5e3916
Show file tree
Hide file tree
Showing 14 changed files with 162 additions and 108 deletions.
16 changes: 8 additions & 8 deletions LICENSES_bundled
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,44 @@ NumPy:
license: BSD 3-Clause "New" or "Revised" License
repository: https://github.com/numpy/numpy
homepage: https://numpy.org/
dependencyLevel: production
dependency: production

SciPy
name: scipy
version: 1.11.3
license: BSD 3-Clause "New" or "Revised" License
repository: https://github.com/scipy/scipy
homepage: https://scipy.org/
dependencyLevel: production
dependency: production

MyPy
name: mypy
version: 1.6.1
license: The MIT License
repository: https://github.com/python/mypy
homepage: https://www.mypy-lang.org/
dependencyLevel: development
dependency: dev

Ruff
name: ruff
version: 0.1.5
license: The MIT license
repository: https://github.com/astral-sh/ruff
homepage: https://docs.astral.sh/ruff/
dependencyLevel: development
dependency: dev

pytest
Pytest
name: pytest
version: 7.4.3
license: The MIT License
repository: https://github.com/pytest-dev/pytest
homepage: https://docs.pytest.org/en/latest/
dependencyLevel: development
dependency: dev

pytest-cov
Pytest-cov
name: pytest-cov
version: 4.1.0
license: The MIT License
repository: https://github.com/pytest-dev/pytest-cov
homepage: https://pytest-cov.readthedocs.io/en/latest/
dependencyLevel: development
dependency: dev
42 changes: 21 additions & 21 deletions src/obscure_stats/association/association.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,37 @@


def _check_arrays(x: np.ndarray, y: np.ndarray) -> bool:
"""Check arrays."""
"""Check arrays.
- Lenghts of the arrays;
- Constant input;
- Contains inf.
"""
if len(x) != len(y):
warnings.warn(
"Lenghts of the inputs do not match.",
stacklevel=2,
)
return True
if all(np.isclose(x, x[0])):
warnings.warn(
"An input array x is constant; the correlation coefficient is not defined.",
"Lenghts of the inputs do not match, please check the arrays.",
stacklevel=2,
)
return True
if all(np.isclose(y, y[0])):
if all(np.isclose(x, x[0], equal_nan=False)) or all(
np.isclose(y, y[0], equal_nan=False)
):
warnings.warn(
"An input array y is constant; the correlation coefficient is not defined.",
"One of the input arrays is constant;"
" the correlation coefficient is not defined.",
stacklevel=2,
)
return True
if any(np.isinf(x)):
if any(np.isinf(x)) or any(np.isinf(y)):
warnings.warn(
"An input array x contains inf, please check the array.",
"One of the input arrays contains inf, please check the array.",
stacklevel=2,
)
return True
if any(np.isinf(y)):
if (np.isnan(x).sum() >= len(x) - 1) or (np.isnan(y).sum() >= len(x) - 1):
warnings.warn(
"An input array y contains inf, please check the array.",
"One of the input arrays has too many missing values,"
" please check the arrays.",
stacklevel=2,
)
return True
Expand All @@ -50,9 +53,6 @@ def _prep_arrays(x: np.ndarray, y: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
_y = np.asarray(y)
_x = _x[notnan]
_y = _y[notnan]
if len(_x) <= 1 or len(_y) <= 1:
msg = "There are too many missing values in the array."
raise ValueError(msg)
return _x, _y


Expand Down Expand Up @@ -128,7 +128,7 @@ def concordance_corrcoef(x: np.ndarray, y: np.ndarray) -> float:
References
----------
Lawrence I-Kuei Lin (1989).
Lin, L. I. (1989).
A concordance correlation coefficient to evaluate reproducibility.
Biometrics. 45 (1): 255-268.
"""
Expand Down Expand Up @@ -172,7 +172,7 @@ def concordance_rate(
References
----------
Holmes, Peter (Autumn 2001).
Holmes, P. (2001).
Correlation: From Picture to Formula.
Teaching Statistics. 23 (3): 67-71.
"""
Expand Down Expand Up @@ -245,7 +245,7 @@ def zhangi(x: np.ndarray, y: np.ndarray) -> float:
References
----------
Zhang, Q., 2023.
Zhang, Q. (2023).
On relationships between Chatterjee's and Spearman's correlation coefficients.
arXiv preprint arXiv:2302.10131.
Expand Down Expand Up @@ -282,7 +282,7 @@ def tanimoto_similarity(x: np.ndarray, y: np.ndarray) -> float:
References
----------
Rogers DJ, Tanimoto TT, 1960.
Rogers, D. J.; Tanimoto, T. T. (1960).
A Computer Program for Classifying Plants.
Science. 132 (3434): 1115-8.
"""
Expand Down
15 changes: 10 additions & 5 deletions src/obscure_stats/central_tendency/central_tendency.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def contraharmonic_mean(x: np.ndarray) -> float:
References
----------
P. S. Bullen (1987).
Bullen, P. S. (1987).
Handbook of means and their inequalities.
Springer.
"""
Expand All @@ -125,9 +125,9 @@ def midmean(x: np.ndarray) -> float:
References
----------
Salkind, N. (2008).
Salkind, N. J. (2008).
Encyclopedia of Research Design.
SAGE.
SAGE Publications, Inc.
"""
q1, q3 = np.nanquantile(x, [0.25, 0.75])
return np.nanmean(np.where((x >= q1) & (x <= q3), x, np.nan))
Expand All @@ -154,6 +154,11 @@ def hodges_lehmann_sen_location(x: np.ndarray) -> float:
Estimation of location based on ranks.
Annals of Mathematical Statistics. 34 (2): 598-611.
Sen, P. K. (1963).
On the Estimation of Relative Potency in Dilution (-Direct)
Assays by Distribution-Free Methods.
Biometrics 19, no. 4: 532-552.
Notes
-----
This implementation uses cartesian product, so the time and memory complexity
Expand Down Expand Up @@ -185,7 +190,7 @@ def standard_trimmed_harrell_davis_quantile(x: np.ndarray, q: float = 0.5) -> fl
References
----------
Akinshin, A. 2022.
Akinshin, A. (2022).
Trimmed Harrell-Davis quantile estimator based on
the highest density interval of the given width.
Communications in Statistics - Simulation and Computation, pp. 1-11.
Expand Down Expand Up @@ -230,7 +235,7 @@ def half_sample_mode(x: np.ndarray) -> float:
References
----------
Bickel, D. R., & Frühwirth, R. (2006).
Bickel, D. R.; Frühwirth, R. (2006).
On a fast, robust estimator of the mode:
Comparisons to other robust estimators with applications.
Computational Statistics & Data Analysis, 50(12), 3500-3530.
Expand Down
2 changes: 2 additions & 0 deletions src/obscure_stats/dispersion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
dispersion_ratio,
efficiency,
hoover_index,
jains_fairness_index,
lloyds_index,
morisita_index,
quartile_coefficient_of_dispersion,
Expand All @@ -26,4 +27,5 @@
"sqad",
"studentized_range",
"robust_coefficient_of_variation",
"jains_fairness_index",
]
55 changes: 44 additions & 11 deletions src/obscure_stats/dispersion/dispersion.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def efficiency(x: np.ndarray) -> float:
References
----------
Grubbs, Frank (1965).
Grubbs, F. E. (1965).
Statistical Measures of Accuracy for Riflemen and Missile Engineers. pp. 26-27.
"""
mean = np.nanmean(x)
Expand Down Expand Up @@ -73,7 +73,7 @@ def coefficient_of_lvariation(x: np.ndarray) -> float:
References
----------
Hosking, J.R.M. (1990).
Hosking, J. R. M. (1990).
L-moments: analysis and estimation of distributions
using linear combinations of order statistics.
Journal of the Royal Statistical Society, Series B. 52 (1): 105-124.
Expand Down Expand Up @@ -101,7 +101,7 @@ def coefficient_of_variation(x: np.ndarray) -> float:
References
----------
Brown, C.E. (1998).
Brown, C. E. (1998).
Coefficient of Variation.
Applied Multivariate Statistics in Geohydrology and Related Sciences. Springer.
"""
Expand Down Expand Up @@ -129,7 +129,7 @@ def robust_coefficient_of_variation(x: np.ndarray) -> float:
References
----------
Reimann, C., Filzmoser, P., Garrett, R.G. and Dutter, R. (2008).
Reimann, C.; Filzmoser; P.; Garrett, R. G.; Dutter, R. (2008).
Statistical Data Analysis Explained: Applied Environmental Statistics with R.
John Wiley and Sons, New York.
"""
Expand Down Expand Up @@ -182,10 +182,10 @@ def dispersion_ratio(x: np.ndarray) -> float:
References
----------
Soobramoney, J., Chifurira, R., & Zewotir, T. (2022)
Soobramoney, J.; Chifurira, R.; Zewotir, T. (2022)
Selecting key features of online behaviour on South African informative websites
prior to unsupervised machine learning.
Statistics, Optimization & Information Computing.
Statistics, Optimization & Information Computing, 11(2), 519-530.
"""
return np.nanmean(x) / (stats.gmean(x, nan_policy="omit") + EPS)

Expand All @@ -211,7 +211,7 @@ def hoover_index(x: np.ndarray) -> float:
References
----------
Edgar Malone Hoover Jr. (1936).
Hoover Jr, E. M. (1936).
The Measurement of Industrial Localization.
Review of Economics and Statistics, 18, No. 162-71.
"""
Expand All @@ -236,7 +236,7 @@ def lloyds_index(x: np.ndarray) -> float:
References
----------
Lloyd, M (1967).
Lloyd, M. (1967).
Mean crowding.
J Anim Ecol. 36 (1): 1-30.
"""
Expand All @@ -263,7 +263,7 @@ def morisita_index(x: np.ndarray) -> float:
References
----------
Morisita, M (1959).
Morisita, M. (1959).
Measuring the dispersion and the analysis of distribution patterns.
Memoirs of the Faculty of Science, Kyushu University Series e. Biol. 2: 215-235
"""
Expand All @@ -275,7 +275,7 @@ def sqad(x: np.ndarray) -> float:
"""Calculate Standard quantile absolute deviation.
This measure is a robust measure of dispersion, that does not need
normalizing constant like MAD.
normalizing constant like MAD and has higher gaussian efficiency.
Parameters
----------
Expand All @@ -294,4 +294,37 @@ def sqad(x: np.ndarray) -> float:
arXiv preprint arXiv:2208.13459.
"""
med = np.nanmedian(x)
return np.nanquantile(np.abs(x - med), q=0.682689492137086) # constant
# constant value to maximize efficiency for normal distribution
return np.nanquantile(np.abs(x - med), q=0.682689492137086)


def jains_fairness_index(x: np.ndarray) -> float:
"""Calculate Jain's Fairness Index.
Jain's Fairness Index is a fairness measures commonly used in network engineering.
The result ranges from 1/n (worst case) to 1 (best case),
and it is maximum when all users receive the same allocation.
In general - measure of uniformity of the distribution.
Parameters
----------
x : array_like
Input array.
Returns
-------
jfi : float or array_like.
The value of the coefficient of variation.
References
----------
Jain, R.; Chiu, D. M.; Hawe, W. (1984).
A Quantitative Measure of Fairness and Discrimination
for Resource Allocation in Shared Computer Systems.
DEC Research Report TR-301.
"""
cv = coefficient_of_variation(x)
if cv is np.inf:
warnings.warn("CV is inf, Jain's Index is not defined.", stacklevel=2)
return np.inf
return 1.0 / (1.0 + cv**2)
4 changes: 2 additions & 2 deletions src/obscure_stats/kurtosis/kurtosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def crow_siddiqui_kurt(x: np.ndarray) -> float:
References
----------
Crow, E. L. and Siddiqui, M. (1967).
Crow, E. L.; Siddiqui, M. (1967).
Robust estimation of location.
Journal of the American Statistical Association, 62(318):353-389.
"""
Expand All @@ -131,7 +131,7 @@ def reza_ma_kurt(x: np.ndarray) -> float:
References
----------
Reza, M.S., & Ma, J. (2016).
Reza, M. S.; Ma, J. (2016).
ICA and PCA integrated feature extraction for classification.
2016 IEEE 13th International Conference on Signal Processing (ICSP), 1083-1088.
"""
Expand Down
Loading

0 comments on commit b5e3916

Please sign in to comment.