Switch to 0.3.12 + minor improvements in doc

raphaelvallat · May 27, 2021 · c7a074f · c7a074f
1 parent 9a75423
commit c7a074f
Show file tree

Hide file tree

Showing 8 changed files with 40 additions and 40 deletions.
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -3,20 +3,24 @@
 What's new
 ##########
 
-v0.3.12 (dev)
--------------
+v0.3.12 (May 2021)
+------------------
 
 **Bugfixes**
 
 This release fixes a critical error in :py:func:`pingouin.partial_corr`: the number of covariates was not taken into account when calculating the degrees of freedom of the partial correlation, thus leading to incorrect results (except for the correlation coefficient which remained unaffected). For more details, please see `issue 171 <https://github.com/raphaelvallat/pingouin/issues/171>`_.
 
-In addition with fixing the p-values and 95% confidence intervals, the statistical power and Bayes Factor have been removed from the output of a partial correlation, at least temporary until we can make sure that these give exact results.
+In addition to fixing the p-values and 95% confidence intervals, the statistical power and Bayes Factor have been removed from the output of :py:func:`pingouin.partial_corr`, at least temporary until we can make sure that these give exact results.
 
 We have also fixed a minor bug in the robust skipped and shepherd correlation (see :py:func:`pingouin.corr`), for which the calculation of the confidence intervals and statistical power did not take into account the number of outliers. These are now calculated only on the cleaned data.
 
+.. warning:: We therefore strongly recommend that all users UPDATE Pingouin (:code:`pip install -U pingouin`) and CHECK ANY RESULTS obtained with the :py:func:`pingouin.partial_corr` function.
+
 **Enhancements**
 
 a. Major refactoring of :py:func:`pingouin.plot_blandaltman`, which now has many additional parameters. It also uses a T distribution instead of a normal distribution to estimate the 95% confidence intervals of the mean difference and agreement limits. See `issue 167 <https://github.com/raphaelvallat/pingouin/issues/167>`_.
+b. For clarity, the `z`, `r2` and `adj_r2` have been removed from the output of :py:func:`pingouin.corr` and :py:func:`pingouin.pairwise_corr`, as these can be readily calculated from the correlation coefficient.
+c. Better testing against R for :py:func:`pingouin.partial_corr` and :py:func:`pingouin.corr`.
 
 v0.3.11 (April 2021)
 --------------------

diff --git a/pingouin/__init__.py b/pingouin/__init__.py
@@ -20,7 +20,7 @@
 from .config import *
 
 # Current version
-__version__ = "0.3.11"
+__version__ = "0.3.12"
 
 # Warn if a newer version of Pingouin is available
 from outdated import warn_if_outdated

diff --git a/pingouin/correlation.py b/pingouin/correlation.py
@@ -371,7 +371,7 @@ def corr(x, y, tail='two-sided', method='pearson', **kwargs):
         * ``'shepherd'``: Shepherd's pi correlation (robust)
         * ``'skipped'``: Skipped correlation (robust)
     **kwargs : optional
-        Optional argument(s) passed to the lower-level functions.
+        Optional argument(s) passed to the lower-level correlation functions.
 
     Returns
     -------
@@ -450,8 +450,10 @@ def corr(x, y, tail='two-sided', method='pearson', **kwargs):
     (which requires scikit-learn). Note that these two methods are
     significantly slower than the previous ones.
 
-    .. important:: Please note that rows with missing values (NaN) are
-        automatically removed.
+    The confidence intervals for the correlation coefficient are estimated
+    using the Fisher transformation.
+
+    .. important:: Rows with missing values (NaN) are automatically removed.
 
     References
     ----------
@@ -615,9 +617,9 @@ def partial_corr(data=None, x=None, y=None, covar=None, x_covar=None,
     Parameters
     ----------
     data : :py:class:`pandas.DataFrame`
-        Dataframe. Note that this function can also directly be used as a
-        :py:class:`pandas.DataFrame` method, in which case this argument is
-        no longer needed.
+        Panddas Dataframe. Note that this function can also directly be used
+        as a :py:class:`pandas.DataFrame` method, in which case this argument
+        is no longer needed.
     x, y : string
         x and y. Must be names of columns in ``data``.
     covar : string or list
@@ -626,16 +628,16 @@ def partial_corr(data=None, x=None, y=None, covar=None, x_covar=None,
     x_covar : string or list
         Covariate(s) for the ``x`` variable. This is used to compute
         semi-partial correlation (i.e. the effect of ``x_covar`` is removed
-        from ``x`` but not from ``y``). Note that you cannot specify both
-        ``covar`` and ``x_covar``.
+        from ``x`` but not from ``y``). Only one of ``covar``,  ``x_covar`` and
+        ``y_covar`` can be specified.
     y_covar : string or list
         Covariate(s) for the ``y`` variable. This is used to compute
         semi-partial correlation (i.e. the effect of ``y_covar`` is removed
-        from ``y`` but not from ``x``). Note that you cannot specify both
-        ``covar`` and ``y_covar``.
+        from ``y`` but not from ``x``). Only one of ``covar``,  ``x_covar`` and
+        ``y_covar`` can be specified.
     tail : string
         Specify whether to return `'one-sided'` or `'two-sided'` p-value.
-        Note that the former are simply half the latter.
+        The former are simply half the latter.
     method : string
         Correlation type:
 
@@ -648,7 +650,7 @@ def partial_corr(data=None, x=None, y=None, covar=None, x_covar=None,
         * ``'shepherd'``: Shepherd's pi correlation (robust)
         * ``'skipped'``: Skipped correlation (robust)
     **kwargs : optional
-        Optional argument(s) passed to the lower-level functions.
+        Optional argument(s) passed to the lower-level correlation functions.
 
     Returns
     -------
@@ -660,6 +662,10 @@ def partial_corr(data=None, x=None, y=None, covar=None, x_covar=None,
         * ``'CI95'``: 95% parametric confidence intervals around :math:`r`
         * ``'p-val'``: tail of the test
 
+    See also
+    --------
+    corr, pairwise_corr, rm_corr
+
     Notes
     -----
     From [1]_:
@@ -672,9 +678,9 @@ def partial_corr(data=None, x=None, y=None, covar=None, x_covar=None,
         two residuals. A semi-partial correlation is computed between one
         residual and another raw (or unresidualized) variable.*
 
-    Note that if you are not interested in calculating the statistics and
-    p-values but only the partial correlation matrix, a (faster)
-    alternative is to use the :py:func:`pingouin.pcorr` method (see example 4).
+    Note that if you are not interested in calculating the p-values [2]_
+    but only the partial correlation matrix, a faster
+    alternative is to use :py:func:`pingouin.pcorr` (see example 4).
 
     Rows with missing values are automatically removed from data. Results have
     been tested against the
@@ -685,6 +691,8 @@ def partial_corr(data=None, x=None, y=None, covar=None, x_covar=None,
     ----------
     .. [1] http://faculty.cas.usf.edu/mbrannick/regression/Partial.html
 
+    .. [2] https://online.stat.psu.edu/stat505/lesson/6/6.3
+
     Examples
     --------
     1. Partial correlation with one covariate
@@ -822,10 +830,9 @@ def partial_corr(data=None, x=None, y=None, covar=None, x_covar=None,
     n_outliers = sum(outliers) if "outliers" in locals() else 0
     n_clean = n - n_outliers
 
-    # Compute the two-sided p-value
-    pval = _correl_pvalue(r, n_clean, k)
-    # Compute the parametric 95% confidence interval
+    # Compute the two-sided p-value and confidence intervals
     # https://online.stat.psu.edu/stat505/lesson/6/6.3
+    pval = _correl_pvalue(r, n_clean, k)
     ci = compute_esci(
         stat=r, nx=(n_clean - k), ny=(n_clean - k), eftype='r', decimals=6)
 
@@ -1109,8 +1116,8 @@ def rm_corr(data=None, x=None, y=None, subject=None, tail='two-sided'):
     Results have been tested against the
     `rmcorr <https://github.com/cran/rmcorr>`_ R package.
 
-    Please note that missing values are automatically removed from the
-    dataframe (listwise deletion).
+    Missing values are automatically removed from the dataframe
+    (listwise deletion).
 
     Examples
     --------

diff --git a/pingouin/nonparametric.py b/pingouin/nonparametric.py
@@ -364,11 +364,6 @@ def wilcoxon(x, y, tail='two-sided'):
     When tail is ``'less'``, the CLES is then set to :math:`1 - \\text{CL}`,
     which gives the proportion of pairs where ``x`` is *lower* than ``y``.
 
-    .. warning :: Versions of Pingouin below 0.2.6 gave wrong two-sided
-        p-values for the Wilcoxon test. P-values were accidentally squared, and
-        therefore smaller. This issue has been resolved in Pingouin>=0.2.6.
-        Make sure to always use the latest release.
-
     References
     ----------
     .. [1] Wilcoxon, F. (1945). Individual comparisons by ranking methods.

diff --git a/pingouin/parametric.py b/pingouin/parametric.py
@@ -881,13 +881,8 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False,
         \\eta_p^2 = \\frac{SS_{\\text{effect}}}{SS_{\\text{effect}} +
         SS_{\\text{error}}}
 
-    Note that missing values are automatically removed. Results have been
-    tested against R, Matlab and JASP.
-
-    .. warning :: Versions of Pingouin below 0.2.5 gave wrong results for
-        **unbalanced N-way ANOVA**. This issue has been resolved in
-        Pingouin>=0.2.5. In such cases, the ANOVA is calculated via an
-        internal call to the statsmodels package.
+    Missing values are automatically removed. Results have been tested against
+    R, Matlab and JASP.
 
     Examples
     --------

diff --git a/pingouin/plotting.py b/pingouin/plotting.py
@@ -284,7 +284,7 @@ def qqplot(x, dist='norm', sparams=(), confidence=.95, figsize=(5, 4),
     location and relative scale of the samples.
 
     .. warning:: Be extra careful when using fancier distributions with several
-        parameters. If you can, always double-check your results with another
+        parameters. Always double-check your results with another
         software or package.
 
     References

diff --git a/pingouin/tests/test_correlation.py b/pingouin/tests/test_correlation.py
@@ -165,8 +165,7 @@ def test_partial_corr(self):
         assert round(pc.at['pearson', 'p-val'], 8) == 0.01496857
 
         # Method == "spearman"
-        # Warning: Spearman slightly different than ppcor package, is this
-        # caused by difference in Python / R when computing ranks?
+        # Warning: Spearman slightly different than ppcor package.
         pc = partial_corr(data=df, x='x', y='y', y_covar=['cv1', 'cv2', 'cv3'],
                           method="spearman")
         # assert round(pc.at['spearman', 'r'], 7) == 0.4597143

diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@ def read(fname):
 MAINTAINER_EMAIL = '[email protected]'
 URL = 'https://pingouin-stats.org/index.html'
 DOWNLOAD_URL = 'https://github.com/raphaelvallat/pingouin/'
-VERSION = '0.3.11'
+VERSION = '0.3.12'
 PACKAGE_DATA = {'pingouin.data.icons': ['*.svg']}
 
 INSTALL_REQUIRES = [