From b5e3916af5ad5aff1ae07b62e5ffe420a2da7fb3 Mon Sep 17 00:00:00 2001 From: Gleb Levitski <36483986+glevv@users.noreply.github.com> Date: Sun, 19 Nov 2023 17:40:07 +0200 Subject: [PATCH] added jfi, changed docs, added tests --- LICENSES_bundled | 16 +++--- src/obscure_stats/association/association.py | 42 +++++++------- .../central_tendency/central_tendency.py | 15 +++-- src/obscure_stats/dispersion/__init__.py | 2 + src/obscure_stats/dispersion/dispersion.py | 55 +++++++++++++++---- src/obscure_stats/kurtosis/kurtosis.py | 4 +- src/obscure_stats/skewness/skewness.py | 18 +++--- src/obscure_stats/variation/variation.py | 52 +++++++++--------- tests/test_association.py | 28 +++++++++- tests/test_central_tendency.py | 2 +- tests/test_dispersion.py | 6 +- tests/test_kurtosis.py | 2 +- tests/test_skewness.py | 2 +- tests/test_variation.py | 26 ++------- 14 files changed, 162 insertions(+), 108 deletions(-) diff --git a/LICENSES_bundled b/LICENSES_bundled index d592278..293b233 100644 --- a/LICENSES_bundled +++ b/LICENSES_bundled @@ -4,7 +4,7 @@ NumPy: license: BSD 3-Clause "New" or "Revised" License repository: https://github.com/numpy/numpy homepage: https://numpy.org/ - dependencyLevel: production + dependency: production SciPy name: scipy @@ -12,7 +12,7 @@ SciPy license: BSD 3-Clause "New" or "Revised" License repository: https://github.com/scipy/scipy homepage: https://scipy.org/ - dependencyLevel: production + dependency: production MyPy name: mypy @@ -20,7 +20,7 @@ MyPy license: The MIT License repository: https://github.com/python/mypy homepage: https://www.mypy-lang.org/ - dependencyLevel: development + dependency: dev Ruff name: ruff @@ -28,20 +28,20 @@ Ruff license: The MIT license repository: https://github.com/astral-sh/ruff homepage: https://docs.astral.sh/ruff/ - dependencyLevel: development + dependency: dev -pytest +Pytest name: pytest version: 7.4.3 license: The MIT License repository: https://github.com/pytest-dev/pytest homepage: https://docs.pytest.org/en/latest/ - dependencyLevel: development + dependency: dev -pytest-cov +Pytest-cov name: pytest-cov version: 4.1.0 license: The MIT License repository: https://github.com/pytest-dev/pytest-cov homepage: https://pytest-cov.readthedocs.io/en/latest/ - dependencyLevel: development + dependency: dev diff --git a/src/obscure_stats/association/association.py b/src/obscure_stats/association/association.py index da7a4c1..b5ed465 100644 --- a/src/obscure_stats/association/association.py +++ b/src/obscure_stats/association/association.py @@ -9,34 +9,37 @@ def _check_arrays(x: np.ndarray, y: np.ndarray) -> bool: - """Check arrays.""" + """Check arrays. + + - Lenghts of the arrays; + - Constant input; + - Contains inf. + """ if len(x) != len(y): warnings.warn( - "Lenghts of the inputs do not match.", - stacklevel=2, - ) - return True - if all(np.isclose(x, x[0])): - warnings.warn( - "An input array x is constant; the correlation coefficient is not defined.", + "Lenghts of the inputs do not match, please check the arrays.", stacklevel=2, ) return True - if all(np.isclose(y, y[0])): + if all(np.isclose(x, x[0], equal_nan=False)) or all( + np.isclose(y, y[0], equal_nan=False) + ): warnings.warn( - "An input array y is constant; the correlation coefficient is not defined.", + "One of the input arrays is constant;" + " the correlation coefficient is not defined.", stacklevel=2, ) return True - if any(np.isinf(x)): + if any(np.isinf(x)) or any(np.isinf(y)): warnings.warn( - "An input array x contains inf, please check the array.", + "One of the input arrays contains inf, please check the array.", stacklevel=2, ) return True - if any(np.isinf(y)): + if (np.isnan(x).sum() >= len(x) - 1) or (np.isnan(y).sum() >= len(x) - 1): warnings.warn( - "An input array y contains inf, please check the array.", + "One of the input arrays has too many missing values," + " please check the arrays.", stacklevel=2, ) return True @@ -50,9 +53,6 @@ def _prep_arrays(x: np.ndarray, y: np.ndarray) -> tuple[np.ndarray, np.ndarray]: _y = np.asarray(y) _x = _x[notnan] _y = _y[notnan] - if len(_x) <= 1 or len(_y) <= 1: - msg = "There are too many missing values in the array." - raise ValueError(msg) return _x, _y @@ -128,7 +128,7 @@ def concordance_corrcoef(x: np.ndarray, y: np.ndarray) -> float: References ---------- - Lawrence I-Kuei Lin (1989). + Lin, L. I. (1989). A concordance correlation coefficient to evaluate reproducibility. Biometrics. 45 (1): 255-268. """ @@ -172,7 +172,7 @@ def concordance_rate( References ---------- - Holmes, Peter (Autumn 2001). + Holmes, P. (2001). Correlation: From Picture to Formula. Teaching Statistics. 23 (3): 67-71. """ @@ -245,7 +245,7 @@ def zhangi(x: np.ndarray, y: np.ndarray) -> float: References ---------- - Zhang, Q., 2023. + Zhang, Q. (2023). On relationships between Chatterjee's and Spearman's correlation coefficients. arXiv preprint arXiv:2302.10131. @@ -282,7 +282,7 @@ def tanimoto_similarity(x: np.ndarray, y: np.ndarray) -> float: References ---------- - Rogers DJ, Tanimoto TT, 1960. + Rogers, D. J.; Tanimoto, T. T. (1960). A Computer Program for Classifying Plants. Science. 132 (3434): 1115-8. """ diff --git a/src/obscure_stats/central_tendency/central_tendency.py b/src/obscure_stats/central_tendency/central_tendency.py index 3539640..604fe78 100644 --- a/src/obscure_stats/central_tendency/central_tendency.py +++ b/src/obscure_stats/central_tendency/central_tendency.py @@ -101,7 +101,7 @@ def contraharmonic_mean(x: np.ndarray) -> float: References ---------- - P. S. Bullen (1987). + Bullen, P. S. (1987). Handbook of means and their inequalities. Springer. """ @@ -125,9 +125,9 @@ def midmean(x: np.ndarray) -> float: References ---------- - Salkind, N. (2008). + Salkind, N. J. (2008). Encyclopedia of Research Design. - SAGE. + SAGE Publications, Inc. """ q1, q3 = np.nanquantile(x, [0.25, 0.75]) return np.nanmean(np.where((x >= q1) & (x <= q3), x, np.nan)) @@ -154,6 +154,11 @@ def hodges_lehmann_sen_location(x: np.ndarray) -> float: Estimation of location based on ranks. Annals of Mathematical Statistics. 34 (2): 598-611. + Sen, P. K. (1963). + On the Estimation of Relative Potency in Dilution (-Direct) + Assays by Distribution-Free Methods. + Biometrics 19, no. 4: 532-552. + Notes ----- This implementation uses cartesian product, so the time and memory complexity @@ -185,7 +190,7 @@ def standard_trimmed_harrell_davis_quantile(x: np.ndarray, q: float = 0.5) -> fl References ---------- - Akinshin, A. 2022. + Akinshin, A. (2022). Trimmed Harrell-Davis quantile estimator based on the highest density interval of the given width. Communications in Statistics - Simulation and Computation, pp. 1-11. @@ -230,7 +235,7 @@ def half_sample_mode(x: np.ndarray) -> float: References ---------- - Bickel, D. R., & Frühwirth, R. (2006). + Bickel, D. R.; Frühwirth, R. (2006). On a fast, robust estimator of the mode: Comparisons to other robust estimators with applications. Computational Statistics & Data Analysis, 50(12), 3500-3530. diff --git a/src/obscure_stats/dispersion/__init__.py b/src/obscure_stats/dispersion/__init__.py index c2a95ab..037a39d 100644 --- a/src/obscure_stats/dispersion/__init__.py +++ b/src/obscure_stats/dispersion/__init__.py @@ -6,6 +6,7 @@ dispersion_ratio, efficiency, hoover_index, + jains_fairness_index, lloyds_index, morisita_index, quartile_coefficient_of_dispersion, @@ -26,4 +27,5 @@ "sqad", "studentized_range", "robust_coefficient_of_variation", + "jains_fairness_index", ] diff --git a/src/obscure_stats/dispersion/dispersion.py b/src/obscure_stats/dispersion/dispersion.py index 6015403..48bf421 100644 --- a/src/obscure_stats/dispersion/dispersion.py +++ b/src/obscure_stats/dispersion/dispersion.py @@ -23,7 +23,7 @@ def efficiency(x: np.ndarray) -> float: References ---------- - Grubbs, Frank (1965). + Grubbs, F. E. (1965). Statistical Measures of Accuracy for Riflemen and Missile Engineers. pp. 26-27. """ mean = np.nanmean(x) @@ -73,7 +73,7 @@ def coefficient_of_lvariation(x: np.ndarray) -> float: References ---------- - Hosking, J.R.M. (1990). + Hosking, J. R. M. (1990). L-moments: analysis and estimation of distributions using linear combinations of order statistics. Journal of the Royal Statistical Society, Series B. 52 (1): 105-124. @@ -101,7 +101,7 @@ def coefficient_of_variation(x: np.ndarray) -> float: References ---------- - Brown, C.E. (1998). + Brown, C. E. (1998). Coefficient of Variation. Applied Multivariate Statistics in Geohydrology and Related Sciences. Springer. """ @@ -129,7 +129,7 @@ def robust_coefficient_of_variation(x: np.ndarray) -> float: References ---------- - Reimann, C., Filzmoser, P., Garrett, R.G. and Dutter, R. (2008). + Reimann, C.; Filzmoser; P.; Garrett, R. G.; Dutter, R. (2008). Statistical Data Analysis Explained: Applied Environmental Statistics with R. John Wiley and Sons, New York. """ @@ -182,10 +182,10 @@ def dispersion_ratio(x: np.ndarray) -> float: References ---------- - Soobramoney, J., Chifurira, R., & Zewotir, T. (2022) + Soobramoney, J.; Chifurira, R.; Zewotir, T. (2022) Selecting key features of online behaviour on South African informative websites prior to unsupervised machine learning. - Statistics, Optimization & Information Computing. + Statistics, Optimization & Information Computing, 11(2), 519-530. """ return np.nanmean(x) / (stats.gmean(x, nan_policy="omit") + EPS) @@ -211,7 +211,7 @@ def hoover_index(x: np.ndarray) -> float: References ---------- - Edgar Malone Hoover Jr. (1936). + Hoover Jr, E. M. (1936). The Measurement of Industrial Localization. Review of Economics and Statistics, 18, No. 162-71. """ @@ -236,7 +236,7 @@ def lloyds_index(x: np.ndarray) -> float: References ---------- - Lloyd, M (1967). + Lloyd, M. (1967). Mean crowding. J Anim Ecol. 36 (1): 1-30. """ @@ -263,7 +263,7 @@ def morisita_index(x: np.ndarray) -> float: References ---------- - Morisita, M (1959). + Morisita, M. (1959). Measuring the dispersion and the analysis of distribution patterns. Memoirs of the Faculty of Science, Kyushu University Series e. Biol. 2: 215-235 """ @@ -275,7 +275,7 @@ def sqad(x: np.ndarray) -> float: """Calculate Standard quantile absolute deviation. This measure is a robust measure of dispersion, that does not need - normalizing constant like MAD. + normalizing constant like MAD and has higher gaussian efficiency. Parameters ---------- @@ -294,4 +294,37 @@ def sqad(x: np.ndarray) -> float: arXiv preprint arXiv:2208.13459. """ med = np.nanmedian(x) - return np.nanquantile(np.abs(x - med), q=0.682689492137086) # constant + # constant value to maximize efficiency for normal distribution + return np.nanquantile(np.abs(x - med), q=0.682689492137086) + + +def jains_fairness_index(x: np.ndarray) -> float: + """Calculate Jain's Fairness Index. + + Jain's Fairness Index is a fairness measures commonly used in network engineering. + The result ranges from 1/n (worst case) to 1 (best case), + and it is maximum when all users receive the same allocation. + In general - measure of uniformity of the distribution. + + Parameters + ---------- + x : array_like + Input array. + + Returns + ------- + jfi : float or array_like. + The value of the coefficient of variation. + + References + ---------- + Jain, R.; Chiu, D. M.; Hawe, W. (1984). + A Quantitative Measure of Fairness and Discrimination + for Resource Allocation in Shared Computer Systems. + DEC Research Report TR-301. + """ + cv = coefficient_of_variation(x) + if cv is np.inf: + warnings.warn("CV is inf, Jain's Index is not defined.", stacklevel=2) + return np.inf + return 1.0 / (1.0 + cv**2) diff --git a/src/obscure_stats/kurtosis/kurtosis.py b/src/obscure_stats/kurtosis/kurtosis.py index d8ed1fc..de8a989 100644 --- a/src/obscure_stats/kurtosis/kurtosis.py +++ b/src/obscure_stats/kurtosis/kurtosis.py @@ -105,7 +105,7 @@ def crow_siddiqui_kurt(x: np.ndarray) -> float: References ---------- - Crow, E. L. and Siddiqui, M. (1967). + Crow, E. L.; Siddiqui, M. (1967). Robust estimation of location. Journal of the American Statistical Association, 62(318):353-389. """ @@ -131,7 +131,7 @@ def reza_ma_kurt(x: np.ndarray) -> float: References ---------- - Reza, M.S., & Ma, J. (2016). + Reza, M. S.; Ma, J. (2016). ICA and PCA integrated feature extraction for classification. 2016 IEEE 13th International Conference on Signal Processing (ICSP), 1083-1088. """ diff --git a/src/obscure_stats/skewness/skewness.py b/src/obscure_stats/skewness/skewness.py index 1941647..f91c9df 100644 --- a/src/obscure_stats/skewness/skewness.py +++ b/src/obscure_stats/skewness/skewness.py @@ -25,7 +25,7 @@ def pearson_mode_skew(x: np.ndarray) -> float: References ---------- - Pearson, E. S. and Hartley, H. O. (1966). + Pearson, E. S.; Hartley, H. O. (1966). Biometrika Tables for Statisticians, vols. I and II. Cambridge University Press, Cambridge. """ @@ -53,7 +53,7 @@ def pearson_halfmode_skew(x: np.ndarray) -> float: References ---------- - Pearson, E. S. and Hartley, H. O. (1966). + Pearson, E. S.; Hartley, H. O. (1966). Biometrika Tables for Statisticians, vols. I and II. Cambridge University Press, Cambridge. """ @@ -78,7 +78,7 @@ def pearson_median_skew(x: np.ndarray) -> float: References ---------- - Pearson, E.S. and Hartley, H.O. (1966). + Pearson, E.S.; Hartley, H.O. (1966). Biometrika Tables for Statisticians, vols. I and II. Cambridge University Press, Cambridge. """ @@ -156,7 +156,7 @@ def groeneveld_skew(x: np.ndarray) -> float: References ---------- - Groeneveld, R.A.; Meeden, G. (1984). + Groeneveld, R. A.; Meeden, G. (1984). Measuring Skewness and Kurtosis. The Statistician. 33 (4): 391-399. """ @@ -184,7 +184,7 @@ def kelly_skew(x: np.ndarray) -> float: References ---------- - David, F. N. and Johnson, N. L., (1956). + David, F. N.; Johnson, N. L. (1956). Some tests of significance with ordered variables. J. R. Stat. Soc. Ser. B Stat. Methodol. 18, 1-31. """ @@ -210,7 +210,7 @@ def hossain_adnan_skew(x: np.ndarray) -> float: References ---------- - Hossain, M.F. and Adnan, M.A.S.A (2007). + Hossain, M. F.; Adnan, M. A. S. A (2007). A New Approach to Determine the Asymmetry of a Distribution. Journal of Applied St atistical Science, Vol.15, pp. 127-134. """ @@ -236,7 +236,7 @@ def forhad_shorna_rank_skew(x: np.ndarray) -> float: References ---------- - Shorna, U. S., & Hossain, M. (2019). + Shorna, U. S.; Hossain, M. (2019). A New Approach to Determine the Coefficient of Skewness and An Alternative Form of Boxplot. arXiv preprint arXiv:1908.06400. @@ -279,7 +279,7 @@ def auc_skew_gamma(x: np.ndarray, dp: float = 0.01) -> float: References ---------- - Arachchige, C. N., & Prendergast, L. A. (2019). + Arachchige, C. N.; & Prendergast, L. A. (2019). Mean skewness measures. arXiv preprint arXiv:1912.06996. """ @@ -308,7 +308,7 @@ def wauc_skew_gamma(x: np.ndarray, dp: float = 0.01) -> float: References ---------- - Arachchige, C. N., & Prendergast, L. A. (2019). + Arachchige, C. N.; & Prendergast, L. A. (2019). Mean skewness measures. arXiv preprint arXiv:1912.06996. """ diff --git a/src/obscure_stats/variation/variation.py b/src/obscure_stats/variation/variation.py index 9f2ccf0..32da536 100644 --- a/src/obscure_stats/variation/variation.py +++ b/src/obscure_stats/variation/variation.py @@ -13,8 +13,8 @@ def mod_vr(x: np.ndarray) -> float: This ratio could be interpreted as the probability of category not being the most frequent. - Low values of ModVR correspond to small amount of variation - and high values to larger amounts of variation. + Low values of Mode VR correspond to lower variation and + high values to higher variation. Parameters ---------- @@ -28,7 +28,7 @@ def mod_vr(x: np.ndarray) -> float: References ---------- - Wilcox, Allen R. (June 1973). + Wilcox, A. R. (1973). Indices of Qualitative Variation and Political Measurement. The Western Political Quarterly. 26 (2): 325-343. """ @@ -43,8 +43,8 @@ def range_vr(x: np.ndarray) -> float: Ratio of frequencies of the least and the most common categories. This ratio is similar to range or peak-to-peak for real values. - Low values of RanVR correspond to small amount of variation - and high values to larger amounts of variation. + Low values of Range VR correspond to lower variation and + high values to higher variation. Parameters ---------- @@ -58,7 +58,7 @@ def range_vr(x: np.ndarray) -> float: References ---------- - Wilcox, Allen R. (June 1973). + Wilcox, A. R. (1973). Indices of Qualitative Variation and Political Measurement. The Western Political Quarterly. 26 (2): 325-343. """ @@ -73,8 +73,8 @@ def gibbs_m1(x: np.ndarray) -> float: of samples will belong to the same category (standardized likelihood of a random pair falling in the same category). - Low values of G1 correspond to small amount of variation - and high values to larger amounts of variation. + Low values of Gibbs M1 correspond to lower variation and + high values to higher variation. Parameters ---------- @@ -88,8 +88,8 @@ def gibbs_m1(x: np.ndarray) -> float: References ---------- - Gibbs, Jack P., Poston Jr, Dudley L. (March 1975). - The Division of Labor: Conceptualization and Related Measures". + Gibbs, J. P.; Poston Jr, D. L. (1975). + The Division of Labor: Conceptualization and Related Measures. Social Forces, 53 (3): 468-476. See Also @@ -114,8 +114,8 @@ def gibbs_m2(x: np.ndarray) -> float: M2 can be interpreted as the ratio of the variance of the multinomial distribution to the variance of a binomial distribution. - Low values of G2 correspond to small amount of variation - and high values to larger amounts of variation. + Low values of Gibbs M2 correspond to lower variation and + high values to higher variation. Parameters ---------- @@ -129,8 +129,8 @@ def gibbs_m2(x: np.ndarray) -> float: References ---------- - Gibbs, Jack P., Poston Jr, Dudley L. (March 1975). - The Division of Labor: Conceptualization and Related Measures". + Gibbs, J. P.; Poston Jr, D. L. (1975). + The Division of Labor: Conceptualization and Related Measures. Social Forces, 53 (3): 468-476. """ freq = np.asarray(list(Counter(x).values())) / len(x) @@ -143,8 +143,8 @@ def b_index(x: np.ndarray) -> float: Normalized to 0-1 range geometric mean of probabilities of all categories. - Low values of BIn correspond to small amount of variation - and high values to larger amounts of variation. + Low values of B Index correspond to lower variation and + high values to higher variation. Parameters ---------- @@ -158,7 +158,7 @@ def b_index(x: np.ndarray) -> float: References ---------- - Wilcox, Allen R. (June 1973). + Wilcox, A. R. (1973). Indices of Qualitative Variation and Political Measurement. The Western Political Quarterly. 26 (2): 325-343. """ @@ -172,8 +172,8 @@ def ada_index(x: np.ndarray) -> float: Normalized to 0-1 range categorical analog of the mean deviation. - Low values of AdaIn correspond to small amount of variation - and high values to larger amounts of variation. + Low values of Ada Index correspond to lower variation and + high values to higher variation. Parameters ---------- @@ -187,7 +187,7 @@ def ada_index(x: np.ndarray) -> float: References ---------- - Wilcox, Allen R. (June 1973). + Wilcox, A. R. (1973). Indices of Qualitative Variation and Political Measurement. The Western Political Quarterly. 26 (2): 325-343. """ @@ -199,12 +199,12 @@ def ada_index(x: np.ndarray) -> float: def extropy(x: np.ndarray) -> float: - """Calculate Information Extropy (bits). + """Calculate Negative Information Extropy (bits). - Measure complementary to entropy. + This measure is complementary to entropy. - Low values of extropy correspond to high amount of variation - and high values to smaller amounts of variation. + Low values of extropy correspond to lower variation and + high values to higher variation. Parameters ---------- @@ -218,10 +218,10 @@ def extropy(x: np.ndarray) -> float: References ---------- - Lad, F., Sanfilippo, G., & Agro, G. (2015). + Lad, F.; Sanfilippo, G.; Agro, G. (2015). Extropy: Complementary dual of entropy. Statistical Science, 30(1), 40-58. """ freq = np.asarray(list(Counter(x).values())) / len(x) p = 1.0 - freq + 1e-7 - return np.sum(p * np.log2(p)) + return -np.sum(p * np.log2(p)) diff --git a/tests/test_association.py b/tests/test_association.py index 2ab45c0..ecee83d 100644 --- a/tests/test_association.py +++ b/tests/test_association.py @@ -74,7 +74,8 @@ def test_unsigned_corr_sensibility( func: typing.Callable, y_array_float: np.ndarray ) -> None: """Testing for result correctness.""" - w = np.r_[2, np.ones(shape=(len(y_array_float) - 1))] + w = np.ones(shape=len(y_array_float)) + w[0] = 2 if func(y_array_float, -y_array_float) < func(y_array_float, w): msg = "Corr coeff higher in the first case." raise ValueError(msg) @@ -92,7 +93,7 @@ def test_unsigned_corr_sensibility( ) def test_const(func: typing.Callable, y_array_float: np.ndarray) -> None: """Testing for constant input.""" - x = np.ones(shape=(len(y_array_float))) + x = np.ones(shape=(len(y_array_float),)) with pytest.warns(match="is constant"): if func(x, y_array_float) is not np.nan: msg = "Corr coef should be 0 with constant input." @@ -141,7 +142,30 @@ def test_notfinite_association( if np.isnan(func(x_array_nan, y_array_int)): msg = "Corr coef should support nans." raise ValueError(msg) + with pytest.warns(match="too many missing values"): + func(x_array_nan[:2], x_array_int[:2]) with pytest.warns(match="contains inf"): if not np.isnan(func(x_array_int, y_array_inf)): msg = "Corr coef should support infs." raise ValueError(msg) + + +@pytest.mark.parametrize( + "func", + [ + chatterjeexi, + concordance_corrcoef, + concordance_rate, + symmetric_chatterjeexi, + tanimoto_similarity, + zhangi, + ], +) +def test_unequal_arrays( + func: typing.Callable, + x_array_int: np.ndarray, + y_array_int: np.ndarray, +) -> None: + """Test for unequal arrays.""" + with pytest.warns(match="Lenghts of the inputs do not match"): + func(x_array_int[:4], y_array_int[:3]) diff --git a/tests/test_central_tendency.py b/tests/test_central_tendency.py index 8386f8f..e85f565 100644 --- a/tests/test_central_tendency.py +++ b/tests/test_central_tendency.py @@ -122,5 +122,5 @@ def test_statistic_with_nans( ) -> None: """Test for different data types.""" if np.isnan(func(x_array_nan)): - msg = "Statistics should support nans." + msg = "Statistic should not return nans." raise ValueError(msg) diff --git a/tests/test_dispersion.py b/tests/test_dispersion.py index e125c66..15f27e5 100644 --- a/tests/test_dispersion.py +++ b/tests/test_dispersion.py @@ -10,6 +10,7 @@ dispersion_ratio, efficiency, hoover_index, + jains_fairness_index, lloyds_index, morisita_index, quartile_coefficient_of_dispersion, @@ -33,6 +34,7 @@ quartile_coefficient_of_dispersion, sqad, studentized_range, + jains_fairness_index, ], ) @pytest.mark.parametrize( @@ -83,6 +85,7 @@ def test_dispersion_sensibility(func: typing.Callable, seed: int) -> None: robust_coefficient_of_variation, quartile_coefficient_of_dispersion, efficiency, + jains_fairness_index, ], ) def test_cv_corner_cases(func: typing.Callable) -> None: @@ -108,6 +111,7 @@ def test_cv_corner_cases(func: typing.Callable) -> None: quartile_coefficient_of_dispersion, sqad, studentized_range, + jains_fairness_index, ], ) def test_statistic_with_nans( @@ -116,5 +120,5 @@ def test_statistic_with_nans( ) -> None: """Test for different data types.""" if np.isnan(func(x_array_nan)): - msg = "Statistics should support nans." + msg = "Statistic should not return nans." raise ValueError(msg) diff --git a/tests/test_kurtosis.py b/tests/test_kurtosis.py index 195b353..a122c7b 100644 --- a/tests/test_kurtosis.py +++ b/tests/test_kurtosis.py @@ -74,5 +74,5 @@ def test_statistic_with_nans( ) -> None: """Test for different data types.""" if np.isnan(func(x_array_nan)): - msg = "Statistics should support nans." + msg = "Statistic should not return nans." raise ValueError(msg) diff --git a/tests/test_skewness.py b/tests/test_skewness.py index e8c7bfe..5bb759b 100644 --- a/tests/test_skewness.py +++ b/tests/test_skewness.py @@ -216,5 +216,5 @@ def test_statistic_with_nans( ) -> None: """Test for different data types.""" if np.isnan(func(x_array_nan)): - msg = "Statistics should support nans." + msg = "Statistic should not return nans." raise ValueError(msg) diff --git a/tests/test_variation.py b/tests/test_variation.py index f05e863..d6ee606 100644 --- a/tests/test_variation.py +++ b/tests/test_variation.py @@ -47,33 +47,19 @@ def test_mock_variation_functions( gibbs_m2, mod_vr, range_vr, - ], -) -@pytest.mark.parametrize("seed", [1, 42, 99]) -def test_var_sensibility_higher_better(func: typing.Callable, seed: int) -> None: - """Testing for result correctness.""" - rng = np.random.default_rng(seed) - low_var = rng.choice(["a", "b", "c", "d"], p=[0.25, 0.25, 0.25, 0.25], size=100) - high_var = rng.choice(["a", "b", "c", "d"], p=[0.75, 0.15, 0.05, 0.05], size=100) - if func(low_var) < func(high_var): - msg = "Variation value in the first case should be higher." - raise ValueError(msg) - - -@pytest.mark.parametrize( - "func", - [ extropy, ], ) @pytest.mark.parametrize("seed", [1, 42, 99]) -def test_var_sensibility_lower_better(func: typing.Callable, seed: int) -> None: +def test_var_sensibility_higher_better(func: typing.Callable, seed: int) -> None: """Testing for result correctness.""" rng = np.random.default_rng(seed) low_var = rng.choice(["a", "b", "c", "d"], p=[0.25, 0.25, 0.25, 0.25], size=100) high_var = rng.choice(["a", "b", "c", "d"], p=[0.75, 0.15, 0.05, 0.05], size=100) - if func(low_var) > func(high_var): - msg = "Variation value in the first case should be lower." + low_res = func(low_var) + high_res = func(high_var) + if low_res < high_res: + msg = f"Statistic value should be higher, got {low_res} < {high_res}" raise ValueError(msg) @@ -95,5 +81,5 @@ def test_statistic_with_nans( ) -> None: """Test for different data types.""" if np.isnan(func(c_array_nan)): - msg = "Statistics should support nans." + msg = "Statistic should not return nans." raise ValueError(msg)