From a3e2ba6a9fc36ad34c73399b26c40b4642742b42 Mon Sep 17 00:00:00 2001
From: Raphael Vallat <raphaelvallat9@gmail.com>
Date: Sun, 20 Feb 2022 09:02:48 -0800
Subject: [PATCH] Release v0.5.1 (#236)

* Flake8

* Explicit error when y is an empty list in pg.ttest

https://github.com/raphaelvallat/pingouin/issues/222

* Add keyword arguments in homoscedasticity function

https://github.com/raphaelvallat/pingouin/issues/218

* Bugfix rm_anova and mixed_anova changed the dtypes of categorical columns + added observed=True to all groupby

https://github.com/raphaelvallat/pingouin/issues/224

* Update version number in init and setup

* Use np.isclose for test_pearson == 1

https://github.com/raphaelvallat/pingouin/issues/195

* Coverage for try..except scipy fallback

* Fix set_option for pandas 1.4

* Upgraded dependencies for seaborn and statsmodels

* Added Jarque-Bera test in pg.normality

https://github.com/raphaelvallat/pingouin/issues/216

* Coverage scipy import error

* Use pd.concat instead of frame.append to avoid FutureWarning

* Remove add_categories(inplace=True) to avoid FutureWarning

* GH Discussions instead of Gitter

* Minor doc fix
---
 README.rst                          |  7 +--
 docs/changelog.rst                  | 19 ++++--
 docs/index.rst                      |  5 +-
 pingouin/__init__.py                |  2 +-
 pingouin/distribution.py            | 52 +++++++++------
 pingouin/pairwise.py                | 27 +++-----
 pingouin/parametric.py              | 98 +++++++++++------------------
 pingouin/regression.py              |  6 +-
 pingouin/tests/test_correlation.py  |  5 +-
 pingouin/tests/test_distribution.py |  6 +-
 pingouin/tests/test_pairwise.py     |  2 +-
 pingouin/tests/test_parametric.py   | 11 ++--
 pingouin/tests/test_utils.py        |  6 ++
 pingouin/utils.py                   |  1 +
 requirements.txt                    |  4 +-
 setup.py                            |  6 +-
 16 files changed, 121 insertions(+), 136 deletions(-)

diff --git a/README.rst b/README.rst
index 1c3c2b13..75a54395 100644
--- a/README.rst
+++ b/README.rst
@@ -23,8 +23,6 @@
 .. image:: http://joss.theoj.org/papers/d2254e6d8e8478da192148e4cfbe4244/status.svg
     :target: http://joss.theoj.org/papers/d2254e6d8e8478da192148e4cfbe4244
 
-.. image:: https://badges.gitter.im/owner/repo.png
-    :target: https://gitter.im/pingouin-stats/Lobby
 
 ----------------
 
@@ -70,10 +68,7 @@ Documentation
 Chat
 ====
 
-If you have questions, please ask them in the public `Gitter chat <https://gitter.im/pingouin-stats/Lobby>`_
-
-.. image:: https://badges.gitter.im/owner/repo.png
-    :target: https://gitter.im/pingouin-stats/Lobby
+If you have questions, please ask them in `GitHub Discussions <https://github.com/raphaelvallat/pingouin/discussions>`_.
 
 Installation
 ============
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 04a7fd47..015fe374 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -8,13 +8,24 @@ What's new
 
 *************
 
-v0.6.0.dev
-----------
+v0.5.1 (February 2022)
+----------------------
+
+This is a minor release, with several bugfixes and improvements. This release is compatible with SciPy 1.8 and Pandas 1.4.
+
+**Bugfixes**
+
+a. Added support for SciPy 1.8 and Pandas 1.4. `PR 234 <https://github.com/raphaelvallat/pingouin/pull/234>`_.
+b. Fixed bug where :py:func:`pingouin.rm_anova` and :py:func:`pingouin.mixed_anova` changed the dtypes of categorical columns in-place (`issue 224 <https://github.com/raphaelvallat/pingouin/issues/224>`_).
 
 **Enhancements**
 
-a. Faster implementation of :py:func:`pingouin.gzscore`, adding all options available in zscore: axis, ddof and nan_policy. Warning: this functions is deprecated and will be removed in pingouin 0.7.0 (use scipy.stats.gzscore instead). See `pull request 210 <https://github.com/raphaelvallat/pingouin/pull/210>`_.
-b. Replace use of statsmodels' studentized range distribution functions with more SciPy's more accurate `scipy.stats.studentized_range`. See `pull request 229 <https://github.com/raphaelvallat/pingouin/pull/229>`_.
+a. Faster implementation of :py:func:`pingouin.gzscore`, adding all options available in zscore: axis, ddof and nan_policy. Warning: this functions is deprecated and will be removed in pingouin 0.7.0 (use :py:func:`scipy.stats.gzscore` instead). `PR 210 <https://github.com/raphaelvallat/pingouin/pull/210>`_.
+b. Replace use of statsmodels' studentized range distribution functions with more SciPy's more accurate :py:func:`scipy.stats.studentized_range`. `PR 229 <https://github.com/raphaelvallat/pingouin/pull/229>`_.
+c. Add support for optional keywords argument in the :py:func:`pingouin.homoscedasticity` function (`issue 218 <https://github.com/raphaelvallat/pingouin/issues/218>`_).
+d. Add support for the Jarque-Bera test in :py:func:`pingouin.normality` (`issue 216 <https://github.com/raphaelvallat/pingouin/issues/216>`_).
+
+Lastly, we have also deprecated the Gitter forum in favor of `GitHub Discussions <https://github.com/raphaelvallat/pingouin/discussions>`_. Please use Discussions to ask questions, share ideas / tips and engage with the Pingouin community!
 
 *************
 
diff --git a/docs/index.rst b/docs/index.rst
index 927ee5b7..663c8bdc 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -21,9 +21,6 @@
 .. image:: http://joss.theoj.org/papers/d2254e6d8e8478da192148e4cfbe4244/status.svg
     :target: http://joss.theoj.org/papers/d2254e6d8e8478da192148e4cfbe4244
 
-.. image:: https://badges.gitter.im/owner/repo.png
-    :target: https://gitter.im/pingouin-stats/Lobby
-
 
 ----------------
 
@@ -108,7 +105,7 @@ Whenever a new release is out there, you can upgrade your version by typing the
 Quick start
 ===========
 
-* If you have *questions*, please ask them in the public `Gitter chat <https://gitter.im/pingouin-stats/Lobby>`_.
+* If you have *questions*, please ask them in `GitHub Discussions <https://github.com/raphaelvallat/pingouin/discussions>`_.
 
 * If you want to *report a bug*, please open an issue on the `GitHub repository <https://github.com/raphaelvallat/pingouin>`_.
 
diff --git a/pingouin/__init__.py b/pingouin/__init__.py
index cff5759c..bcb123b9 100644
--- a/pingouin/__init__.py
+++ b/pingouin/__init__.py
@@ -20,7 +20,7 @@
 from .config import *
 
 # Current version
-__version__ = "0.5.0"
+__version__ = "0.5.1"
 
 # Warn if a newer version of Pingouin is available
 from outdated import warn_if_outdated
diff --git a/pingouin/distribution.py b/pingouin/distribution.py
index 80a5d2ea..3fead4a4 100644
--- a/pingouin/distribution.py
+++ b/pingouin/distribution.py
@@ -86,9 +86,11 @@ def normality(data, dv=None, group=None, method="shapiro", alpha=.05):
         Grouping variable (only when ``data`` is a long-format dataframe).
     method : str
         Normality test. `'shapiro'` (default) performs the Shapiro-Wilk test
-        using :py:func:`scipy.stats.shapiro`, and `'normaltest'` performs the
-        omnibus test of normality using :py:func:`scipy.stats.normaltest`.
-        The latter is more appropriate for large samples.
+        using :py:func:`scipy.stats.shapiro`, `'normaltest'` performs the
+        omnibus test of normality using :py:func:`scipy.stats.normaltest`, `'jarque_bera'` performs
+        the Jarque-Bera test using :py:func:`scipy.stats.jarque_bera`.
+        The Omnibus and Jarque-Bera tests are more suitable than the Shapiro test for
+        large samples.
     alpha : float
         Significance level.
 
@@ -194,9 +196,16 @@ def normality(data, dv=None, group=None, method="shapiro", alpha=.05):
                  W      pval  normal
     Pre   0.967718  0.478773    True
     Post  0.940728  0.095157    True
+
+    5. Same but using the Jarque-Bera test
+
+    >>> pg.normality(data, dv='Performance', group='Time', method="jarque_bera")
+                W      pval   normal
+    Pre   0.304021  0.858979    True
+    Post  1.265656  0.531088    True
     """
     assert isinstance(data, (pd.DataFrame, pd.Series, list, np.ndarray))
-    assert method in ['shapiro', 'normaltest']
+    assert method in ['shapiro', 'normaltest', 'jarque_bera']
     if isinstance(data, pd.Series):
         data = data.to_frame()
     col_names = ['W', 'pval']
@@ -227,14 +236,13 @@ def normality(data, dv=None, group=None, method="shapiro", alpha=.05):
             grp = data.groupby(group, observed=True, sort=False)
             cols = grp.groups.keys()
             for _, tmp in grp:
-                stats = stats.append(normality(tmp[dv].to_numpy(),
-                                               method=method,
-                                               alpha=alpha))
+                st_grp = normality(tmp[dv].to_numpy(), method=method, alpha=alpha)
+                stats = pd.concat([stats, st_grp], axis=0, ignore_index=True)
             stats.index = cols
     return _postprocess_dataframe(stats)
 
 
-def homoscedasticity(data, dv=None, group=None, method="levene", alpha=.05):
+def homoscedasticity(data, dv=None, group=None, method="levene", alpha=.05, **kwargs):
     """Test equality of variance.
 
     Parameters
@@ -253,6 +261,8 @@ def homoscedasticity(data, dv=None, group=None, method="levene", alpha=.05):
         The former is more robust to departure from normality.
     alpha : float
         Significance level.
+    **kwargs : optional
+        Optional argument(s) passed to the lower-level :py:func:`scipy.stats.levene` function.
 
     Returns
     -------
@@ -339,7 +349,13 @@ def homoscedasticity(data, dv=None, group=None, method="levene", alpha=.05):
                    W      pval  equal_var
     levene  1.173518  0.310707       True
 
-    3. Bartlett test using a list of iterables
+    3. Same but using a mean center
+
+    >>> pg.homoscedasticity(data_long, dv="value", group="variable", center="mean")
+                   W      pval  equal_var
+    levene  1.572239  0.209303       True
+
+    4. Bartlett test using a list of iterables
 
     >>> data = [[4, 8, 9, 20, 14], np.array([5, 8, 15, 45, 12])]
     >>> pg.homoscedasticity(data, method="bartlett", alpha=.05)
@@ -356,30 +372,28 @@ def homoscedasticity(data, dv=None, group=None, method="levene", alpha=.05):
             # Get numeric data only
             numdata = data._get_numeric_data()
             assert numdata.shape[1] > 1, 'Data must have at least two columns.'
-            statistic, p = func(*numdata.to_numpy().T)
+            statistic, p = func(*numdata.to_numpy().T, **kwargs)
         else:
             # Long-format
             assert group in data.columns
             assert dv in data.columns
             grp = data.groupby(group, observed=True)[dv]
             assert grp.ngroups > 1, 'Data must have at least two columns.'
-            statistic, p = func(*grp.apply(list))
+            statistic, p = func(*grp.apply(list), **kwargs)
     elif isinstance(data, list):
         # Check that list contains other list or np.ndarray
         assert all(isinstance(el, (list, np.ndarray)) for el in data)
         assert len(data) > 1, 'Data must have at least two iterables.'
-        statistic, p = func(*data)
+        statistic, p = func(*data, **kwargs)
     else:
         # Data is a dict
         assert all(isinstance(el, (list, np.ndarray)) for el in data.values())
         assert len(data) > 1, 'Data must have at least two iterables.'
-        statistic, p = func(*data.values())
+        statistic, p = func(*data.values(), **kwargs)
 
     equal_var = True if p > alpha else False
     stat_name = 'W' if method.lower() == 'levene' else 'T'
-
-    stats = pd.DataFrame({stat_name: statistic, 'pval': p,
-                          'equal_var': equal_var}, index=[method])
+    stats = pd.DataFrame({stat_name: statistic, 'pval': p, 'equal_var': equal_var}, index=[method])
 
     return _postprocess_dataframe(stats)
 
@@ -463,7 +477,7 @@ def _check_multilevel_rm(data, func='epsilon'):
             # We end up with a one-way design. It is similar to applying
             # a paired T-test to gain scores instead of using repeated measures
             # on two time points. Here we have computed the gain scores.
-            data = data.groupby(level=1, axis=1).diff(axis=1).dropna(axis=1)
+            data = data.groupby(level=1, axis=1, observed=True).diff(axis=1).dropna(axis=1)
             data = data.droplevel(level=0, axis=1)
         else:
             # Both factors have more than 2 levels -- differ from R / JASP
@@ -498,8 +512,8 @@ def _long_to_wide_rm(data, dv=None, within=None, subject=None):
     # Keep all relevant columns and reset index
     data = data[_fl([subject, within, dv])]
     # Convert to wide-format + collapse to the mean
-    data = pd.pivot_table(data, index=subject, values=dv, columns=within,
-                          aggfunc='mean', dropna=True, observed=True)
+    data = pd.pivot_table(
+        data, index=subject, values=dv, columns=within, aggfunc='mean', dropna=True, observed=True)
     return data
 
 
diff --git a/pingouin/pairwise.py b/pingouin/pairwise.py
index 8b73c4f0..c759cdc3 100644
--- a/pingouin/pairwise.py
+++ b/pingouin/pairwise.py
@@ -173,8 +173,8 @@ def pairwise_ttests(data=None, dv=None, between=None, within=None, subject=None,
 
     >>> import pandas as pd
     >>> import pingouin as pg
-    >>> pd.set_option('expand_frame_repr', False)
-    >>> pd.set_option('max_columns', 20)
+    >>> pd.set_option('display.expand_frame_repr', False)
+    >>> pd.set_option('display.max_columns', 20)
     >>> df = pg.read_dataset('mixed_anova.csv')
     >>> pg.pairwise_ttests(dv='Scores', between='Group', data=df).round(3)
       Contrast        A           B  Paired  Parametric     T    dof alternative  p-unc   BF10  hedges
@@ -426,11 +426,12 @@ def pairwise_ttests(data=None, dv=None, between=None, within=None, subject=None,
             else:
                 tmp = data
             # Recursive call to pairwise_ttests
-            stats = stats.append(pairwise_ttests(
+            pt = pairwise_ttests(
                 dv=dv, between=fbt[i], within=fwt[i], subject=subject, data=tmp,
                 parametric=parametric, marginal=marginal, alpha=alpha, alternative=alternative,
                 padjust=padjust, effsize=effsize, correction=correction, nan_policy=nan_policy,
-                return_desc=return_desc), ignore_index=True, sort=False)
+                return_desc=return_desc)
+            stats = pd.concat([stats, pt], axis=0, ignore_index=True, sort=False)
 
         # Then compute the interaction between the factors
         if interaction:
@@ -608,12 +609,6 @@ def pairwise_tukey(data=None, dv=None, between=None, effsize='hedges'):
     :math:`Q(\\sqrt2|t_i|, r, N - r)` where :math:`r` is the total number of
     groups and :math:`N` is the total sample size.
 
-    .. warning:: Versions of Pingouin below 0.3.10 used a wrong algorithm for
-        the studentized range approximation [2]_, which resulted in (slightly)
-        incorrect p-values. Please make sure you're using the
-        LATEST VERSION of Pingouin, and always DOUBLE CHECK your results with
-        another statistical software.
-
     References
     ----------
     .. [1] Tukey, John W. "Comparing individual means in the analysis of
@@ -635,7 +630,6 @@ def pairwise_tukey(data=None, dv=None, between=None, effsize='hedges'):
     1     Adelie     Gentoo  3700.662  5076.016 -1375.354  56.148 -24.495    0.000  -2.967
     2  Chinstrap     Gentoo  3733.088  5076.016 -1342.928  69.857 -19.224    0.000  -2.894
     """
-
     # First compute the ANOVA
     # For max precision, make sure rounding is disabled
     old_options = options.copy()
@@ -760,12 +754,6 @@ def pairwise_gameshowell(data=None, dv=None, between=None, effsize='hedges'):
     The p-values are then approximated using the Studentized range distribution
     :math:`Q(\\sqrt2|t_i|, r, v_i)`.
 
-    .. warning:: Versions of Pingouin below 0.3.10 used a wrong algorithm for
-        the studentized range approximation [2]_, which resulted in (slightly)
-        incorrect p-values. Please make sure you're using the
-        LATEST VERSION of Pingouin, and always DOUBLE CHECK your results with
-        another statistical software.
-
     References
     ----------
     .. [1] Games, Paul A., and John F. Howell. "Pairwise multiple comparison
@@ -789,7 +777,6 @@ def pairwise_gameshowell(data=None, dv=None, between=None, effsize='hedges'):
     1     Adelie     Gentoo  3700.662  5076.016 -1375.354  58.811 -23.386  249.643  0.00  -2.833
     2  Chinstrap     Gentoo  3733.088  5076.016 -1342.928  65.103 -20.628  170.404  0.00  -3.105
     """
-
     # Check the dataframe
     _check_dataframe(dv=dv, between=between, effects='between', data=data)
 
@@ -956,8 +943,8 @@ def pairwise_corr(data, columns=None, covar=None, alternative='two-sided',
 
     >>> import pandas as pd
     >>> import pingouin as pg
-    >>> pd.set_option('expand_frame_repr', False)
-    >>> pd.set_option('max_columns', 20)
+    >>> pd.set_option('display.expand_frame_repr', False)
+    >>> pd.set_option('display.max_columns', 20)
     >>> data = pg.read_dataset('pairwise_corr').iloc[:, 1:]
     >>> pg.pairwise_corr(data, method='spearman', alternative='greater', padjust='bonf').round(3)
                    X                  Y    method alternative    n      r         CI95%  p-unc  p-corr p-adjust  power
diff --git a/pingouin/parametric.py b/pingouin/parametric.py
index 8a53960b..515c699f 100644
--- a/pingouin/parametric.py
+++ b/pingouin/parametric.py
@@ -10,8 +10,7 @@
 __all__ = ["ttest", "rm_anova", "anova", "welch_anova", "mixed_anova", "ancova"]
 
 
-def ttest(x, y, paired=False, alternative='two-sided', correction='auto', r=.707,
-          confidence=0.95):
+def ttest(x, y, paired=False, alternative='two-sided', correction='auto', r=.707, confidence=0.95):
     """T-test.
 
     Parameters
@@ -198,13 +197,12 @@ def ttest(x, y, paired=False, alternative='two-sided', correction='auto', r=.707
     array([1.971859, 0.057056])
     """
     from scipy.stats import t, ttest_rel, ttest_ind, ttest_1samp
-    try:
-        from scipy.stats._stats_py import (_unequal_var_ttest_denom,
-                                       _equal_var_ttest_denom)
-    except ImportError:  # Fallback for scipy<1.8.0
-        from scipy.stats.stats import (_unequal_var_ttest_denom,
-                                       _equal_var_ttest_denom)
-    from pingouin import (power_ttest, power_ttest2n, compute_effsize)
+    try:  # pragma: no cover
+        from scipy.stats._stats_py import _unequal_var_ttest_denom, _equal_var_ttest_denom
+    except ImportError:  # pragma: no cover
+        # Fallback for scipy<1.8.0
+        from scipy.stats.stats import _unequal_var_ttest_denom, _equal_var_ttest_denom
+    from pingouin import power_ttest, power_ttest2n, compute_effsize
 
     # Check arguments
     assert alternative in ['two-sided', 'greater', 'less'], (
@@ -215,8 +213,7 @@ def ttest(x, y, paired=False, alternative='two-sided', correction='auto', r=.707
     y = np.asarray(y)
 
     if x.size != y.size and paired:
-        warnings.warn("x and y have unequal sizes. Switching to "
-                      "paired == False. Check your data.")
+        warnings.warn("x and y have unequal sizes. Switching to paired == False. Check your data.")
         paired = False
 
     # Remove rows with missing values
@@ -512,13 +509,6 @@ def rm_anova(data=None, dv=None, within=None, subject=None, correction='auto',
     # Check dataframe
     _check_dataframe(dv=dv, within=within, data=data, subject=subject, effects='within')
 
-    # Convert Categorical columns to string
-    # This is important otherwise all the groupby will return different results
-    # unless we specify .groupby(..., observed = True).
-    for c in [subject, within]:
-        if data[c].dtype.name == 'category':
-            data[c] = data[c].astype(str)
-
     assert not data[within].isnull().any(), "Cannot have missing values in `within`."
     assert not data[subject].isnull().any(), "Cannot have missing values in `subject`."
 
@@ -532,10 +522,12 @@ def rm_anova(data=None, dv=None, within=None, subject=None, correction='auto',
     data = data_piv.melt(ignore_index=False, value_name=dv).reset_index()
 
     # Groupby
-    grp_with = data.groupby(within)[dv]
+    # I think that observed=True is actually not needed here since we have already used
+    # `observed=True` in pivot_table.
+    grp_with = data.groupby(within, observed=True)[dv]
     rm = list(data[within].unique())
     n_rm = len(rm)
-    n_obs = int(data.groupby(within)[dv].count().max())
+    n_obs = int(grp_with.count().max())
     grandmean = data[dv].mean()
 
     # Calculate sums of squares
@@ -544,7 +536,7 @@ def rm_anova(data=None, dv=None, within=None, subject=None, correction='auto',
     # sstotal = sstime + ss_resall =  sstime + (sssubj + sserror)
     # ss_total = ((data[dv] - grandmean)**2).sum()
     # We can further divide the residuals into a within and between component:
-    grp_subj = data.groupby(subject)[dv]
+    grp_subj = data.groupby(subject, observed=True)[dv]
     ss_resbetw = n_rm * np.sum((grp_subj.mean() - grandmean)**2)
     ss_reswith = ss_resall - ss_resbetw
 
@@ -633,13 +625,6 @@ def rm_anova2(data=None, dv=None, within=None, subject=None, effsize="np2"):
     # Validate the dataframe
     _check_dataframe(dv=dv, within=within, data=data, subject=subject, effects='within')
 
-    # Convert Categorical columns to string
-    # This is important otherwise all the groupby will return different results
-    # unless we specify .groupby(..., observed = True).
-    for c in [subject, a, b]:
-        if data[c].dtype.name == 'category':
-            data[c] = data[c].astype(str)
-
     assert not data[a].isnull().any(), "Cannot have missing values in %s" % a
     assert not data[b].isnull().any(), "Cannot have missing values in %s" % b
     assert not data[subject].isnull().any(), "Cannot have missing values in %s" % subject
@@ -660,12 +645,14 @@ def rm_anova2(data=None, dv=None, within=None, subject=None, effsize="np2"):
     mu = data[dv].mean()
 
     # Groupby means
-    grp_s = data.groupby(subject)[dv].mean()
-    grp_a = data.groupby([a])[dv].mean()
-    grp_b = data.groupby([b])[dv].mean()
-    grp_ab = data.groupby([a, b])[dv].mean()
-    grp_as = data.groupby([a, subject])[dv].mean()
-    grp_bs = data.groupby([b, subject])[dv].mean()
+    # I think that observed=True is actually not needed here since we have already used
+    # `observed=True` in pivot_table.
+    grp_s = data.groupby(subject, observed=True)[dv].mean()
+    grp_a = data.groupby([a], observed=True)[dv].mean()
+    grp_b = data.groupby([b], observed=True)[dv].mean()
+    grp_ab = data.groupby([a, b], observed=True)[dv].mean()
+    grp_as = data.groupby([a, subject], observed=True)[dv].mean()
+    grp_bs = data.groupby([b, subject], observed=True)[dv].mean()
 
     # Sums of squares
     ss_tot = np.sum((data[dv] - mu)**2)
@@ -732,7 +719,6 @@ def rm_anova2(data=None, dv=None, within=None, subject=None, effsize="np2"):
     # Epsilon
     piv_a = data.pivot_table(index=subject, columns=a, values=dv, observed=True)
     piv_b = data.pivot_table(index=subject, columns=b, values=dv, observed=True)
-    # piv_ab = data.pivot_table(index=subject, columns=[a, b], values=dv)  # Same as data_piv
     eps_a = epsilon(piv_a, correction='gg')
     eps_b = epsilon(piv_b, correction='gg')
     # Note that the GG epsilon of the interaction slightly differs between
@@ -931,12 +917,10 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False,
         elif len(between) == 2:
             # Two factors with balanced design = Pingouin implementation
             # Two factors with unbalanced design = statsmodels
-            return anova2(dv=dv, between=between, data=data, ss_type=ss_type,
-                          effsize=effsize)
+            return anova2(dv=dv, between=between, data=data, ss_type=ss_type, effsize=effsize)
         else:
             # 3 or more factors with (un)-balanced design = statsmodels
-            return anovan(dv=dv, between=between, data=data, ss_type=ss_type,
-                          effsize=effsize)
+            return anovan(dv=dv, between=between, data=data, ss_type=ss_type, effsize=effsize)
 
     # Check data
     _check_dataframe(dv=dv, between=between, data=data, effects='between')
@@ -1035,8 +1019,7 @@ def anova2(data=None, dv=None, between=None, ss_type=2, effsize='np2'):
         df_resid = data[dv].size - (ng1 * ng2)
     else:
         # UNBALANCED DESIGN
-        return anovan(dv=dv, between=between, data=data, ss_type=ss_type,
-                      effsize=effsize)
+        return anovan(dv=dv, between=between, data=data, ss_type=ss_type, effsize=effsize)
 
     # Mean squares
     ms_fac1 = ss_fac1 / df_fac1
@@ -1069,8 +1052,7 @@ def anova2(data=None, dv=None, between=None, ss_type=2, effsize='np2'):
         all_effsize = [np2_fac1, np2_fac2, np2_inter, np.nan]
 
     # Create output dataframe
-    aov = pd.DataFrame({'Source': [fac1, fac2, fac1 + ' * ' + fac2,
-                                   'Residual'],
+    aov = pd.DataFrame({'Source': [fac1, fac2, fac1 + ' * ' + fac2, 'Residual'],
                         'SS': [ss_fac1, ss_fac2, ss_inter, ss_resid],
                         'DF': [df_fac1, df_fac2, df_inter, df_resid],
                         'MS': [ms_fac1, ms_fac2, ms_inter, ms_resid],
@@ -1133,8 +1115,7 @@ def anovan(data=None, dv=None, between=None, ss_type=2, effsize='np2'):
         aov = aov.iloc[1:, :]
 
     aov = aov.reset_index()
-    aov = aov.rename(columns={'index': 'Source', 'sum_sq': 'SS',
-                              'df': 'DF', 'PR(>F)': 'p-unc'})
+    aov = aov.rename(columns={'index': 'Source', 'sum_sq': 'SS', 'df': 'DF', 'PR(>F)': 'p-unc'})
     aov['MS'] = aov['SS'] / aov['DF']
 
     # Effect size
@@ -1144,8 +1125,7 @@ def anovan(data=None, dv=None, between=None, ss_type=2, effsize='np2'):
         all_n2[-1] = np.nan
         aov['n2'] = all_n2
     else:
-        aov['np2'] = (aov['F'] * aov['DF']) / (aov['F'] * aov['DF'] +
-                                               aov.iloc[-1, 2])
+        aov['np2'] = (aov['F'] * aov['DF']) / (aov['F'] * aov['DF'] + aov.iloc[-1, 2])
 
     def format_source(x):
         for fac in between:
@@ -1426,13 +1406,6 @@ def mixed_anova(data=None, dv=None, within=None, subject=None, between=None,
     _check_dataframe(
         dv=dv, within=within, between=between, data=data, subject=subject, effects='interaction')
 
-    # Convert Categorical columns to string
-    # This is important otherwise all the groupby will return different results
-    # unless we specify .groupby(..., observed = True).
-    for c in [within, between, subject]:
-        if data[c].dtype.name == 'category':
-            data[c] = data[c].astype(str)
-
     # Pivot and melt the table. This has several effects:
     # 1) Force missing values to be explicit (a NaN cell is created)
     # 2) Automatic collapsing to the mean if multiple within factors are present
@@ -1445,7 +1418,7 @@ def mixed_anova(data=None, dv=None, within=None, subject=None, between=None,
     # Check that subject IDs do not overlap between groups: the subject ID
     # should have a unique range / set of values for each between-subject
     # group e.g. group1= 1 --> 20 and group2 = 21 --> 40.
-    if not (data.groupby([subject, within])[between].nunique() == 1).all():
+    if not (data.groupby([subject, within], observed=True)[between].nunique() == 1).all():
         raise ValueError("Subject IDs cannot overlap between groups: each "
                          "group in `%s` must have a unique set of "
                          "subject IDs, e.g. group1 = [1, 2, 3, ..., 10] "
@@ -1461,7 +1434,7 @@ def mixed_anova(data=None, dv=None, within=None, subject=None, between=None,
     ss_betw = aov_betw.at[0, 'SS']
     ss_with = aov_with.at[0, 'SS']
     # Extract residuals and interactions
-    grp = data.groupby([between, within])[dv]
+    grp = data.groupby([between, within], observed=True)[dv]
     # ssresall = residuals within + residuals between
     ss_resall = grp.apply(lambda x: (x - x.mean())**2).sum()
     # Interaction
@@ -1470,10 +1443,10 @@ def mixed_anova(data=None, dv=None, within=None, subject=None, between=None,
     ss_resbetw = ss_total - (ss_with + ss_betw + ss_reswith + ss_inter)
 
     # DEGREES OF FREEDOM
-    n_obs = data.groupby(within)[dv].count().max()
+    n_obs = data.groupby(within, observed=True)[dv].count().max()
     df_with = aov_with.at[0, 'DF']
     df_betw = aov_betw.at[0, 'DF']
-    df_resbetw = n_obs - data.groupby(between)[dv].count().count()
+    df_resbetw = n_obs - data.groupby(between, observed=True)[dv].count().count()
     df_reswith = df_with * df_resbetw
     df_inter = aov_with.at[0, 'DF'] * aov_betw.at[0, 'DF']
 
@@ -1514,22 +1487,21 @@ def mixed_anova(data=None, dv=None, within=None, subject=None, between=None,
         ef_inter = ss_inter / (ss_inter + ss_reswith)
 
     # Stats table
-    aov = pd.concat([aov_betw.drop(1), aov_with.drop(1)], sort=False, ignore_index=True)
+    aov = pd.concat([aov_betw.drop(1), aov_with.drop(1)], axis=0, sort=False, ignore_index=True)
     # Update values
     aov.rename(columns={'DF': 'DF1'}, inplace=True)
     aov.at[0, 'F'], aov.at[1, 'F'] = f_betw, f_with
     aov.at[0, 'p-unc'], aov.at[1, 'p-unc'] = p_betw, p_with
     aov.at[0, effsize], aov.at[1, effsize] = ef_betw, ef_with
-    aov = aov.append({
+    aov_inter = pd.DataFrame({
         'Source': 'Interaction', 'SS': ss_inter, 'DF1': df_inter, 'MS': ms_inter, 'F': f_inter,
-        'p-unc': p_inter, effsize: ef_inter}, ignore_index=True)
-
+        'p-unc': p_inter, effsize: ef_inter}, index=[2])
+    aov = pd.concat([aov, aov_inter], axis=0, sort=False, ignore_index=True)
     aov['DF2'] = [df_resbetw, df_reswith, df_reswith]
     aov['eps'] = [np.nan, aov_with.at[0, 'eps'], np.nan]
     col_order = [
         'Source', 'SS', 'DF1', 'DF2', 'MS', 'F', 'p-unc', 'p-GG-corr', effsize, 'eps',
         'sphericity', 'W-spher', 'p-spher']
-
     aov = aov.reindex(columns=col_order)
     aov.dropna(how='all', axis=1, inplace=True)
     return _postprocess_dataframe(aov)
diff --git a/pingouin/regression.py b/pingouin/regression.py
index 437bf1ab..5276fd60 100644
--- a/pingouin/regression.py
+++ b/pingouin/regression.py
@@ -460,8 +460,8 @@ def linear_regression(X, y, add_intercept=True, weights=None, coef_only=False,
 
     # Relative importance
     if relimp:
-        data = pd.concat([pd.DataFrame(y, columns=['y']),
-                          pd.DataFrame(X, columns=names)], sort=False, axis=1)
+        data = pd.concat(
+            [pd.DataFrame(y, columns=['y']), pd.DataFrame(X, columns=names)], sort=False, axis=1)
         if 'Intercept' in names:
             # Intercept is the first column
             reli = _relimp(data.drop(columns=['Intercept']).cov())
@@ -1254,7 +1254,7 @@ def mediation_analysis(data=None, x=None, m=None, y=None, covar=None,
         indirect['names'] = 'Indirect'
     else:
         indirect['names'] = indirect['names'].apply(lambda x: 'Indirect %s' % x)
-    stats = stats.append(indirect, ignore_index=True)
+    stats = pd.concat([stats, indirect], axis=0, ignore_index=True, sort=False)
     stats = stats.rename(columns={'names': 'path'})
 
     # Restore options
diff --git a/pingouin/tests/test_correlation.py b/pingouin/tests/test_correlation.py
index a5302630..ee1c0bb8 100644
--- a/pingouin/tests/test_correlation.py
+++ b/pingouin/tests/test_correlation.py
@@ -112,9 +112,10 @@ def test_corr(self):
         stats = corr(df['Neuroticism'], df['Extraversion'])
         assert np.isclose(1 / float(stats['BF10'].to_numpy()), 1.478e-13)
         # Perfect correlation, CI and power should be 1, BF should be Inf
+        # https://github.com/raphaelvallat/pingouin/issues/195
         stats = corr(x, x)
-        assert stats.at['pearson', 'r'] == 1
-        assert stats.at['pearson', 'power'] == 1
+        assert np.isclose(stats.at['pearson', 'r'], 1)
+        assert np.isclose(stats.at['pearson', 'power'], 1)
         # When one column is a constant, the correlation is not defined
         # and Pingouin return a DataFrame full of NaN, except for ``n``
         x, y = [1, 1, 1], [1, 2, 3]
diff --git a/pingouin/tests/test_distribution.py b/pingouin/tests/test_distribution.py
index 883a57ad..9de5a678 100644
--- a/pingouin/tests/test_distribution.py
+++ b/pingouin/tests/test_distribution.py
@@ -62,20 +62,22 @@ def test_normality(self):
         normality(x, alpha=.05)
         normality(x.tolist(), method='normaltest', alpha=.05)
         # Pandas DataFrame
-        df_nan_piv = df_nan.pivot(index='Subject', columns='Time',
-                                  values='Scores')
+        df_nan_piv = df_nan.pivot(index='Subject', columns='Time', values='Scores')
         normality(df_nan_piv)  # Wide-format dataframe
         normality(df_nan_piv['August'])  # pandas Series
         # The line below is disabled because test fails on python 3.5
         # assert stats_piv.equals(normality(df_nan, group='Time', dv='Scores'))
         normality(df_nan, group='Group', dv='Scores', method='normaltest')
+        normality(df_nan, group='Group', dv='Scores', method='jarque_bera')
 
     def test_homoscedasticity(self):
         """Test function test_homoscedasticity."""
         hl = homoscedasticity(data=[x, y], alpha=.05)
         homoscedasticity(data=[x, y], method='bartlett', alpha=.05)
         hd = homoscedasticity(data={'x': x, 'y': y}, alpha=.05)
+        hd2 = homoscedasticity(data={'x': x, 'y': y}, alpha=.05, center="mean")
         assert hl.equals(hd)
+        assert not hd.equals(hd2)
         # Wide-format DataFrame
         homoscedasticity(df_pivot)
         # Long-format
diff --git a/pingouin/tests/test_pairwise.py b/pingouin/tests/test_pairwise.py
index 5972f095..f2c9fb9b 100644
--- a/pingouin/tests/test_pairwise.py
+++ b/pingouin/tests/test_pairwise.py
@@ -120,7 +120,7 @@ def test_pairwise_ttests(self):
         pt_med = pairwise_ttests(dv='Scores', within='Time',
                                  subject='Subject', padjust='holm',
                                  data=df[df['Group'] == 'Meditation'])
-        pt_merged = pt_con.append(pt_med)
+        pt_merged = pd.concat([pt_con, pt_med], ignore_index=True, sort=False)
         # T, dof and p-values should be equal
         assert np.array_equal(pt_merged['T'], pt['T'].iloc[4:])
         assert np.array_equal(pt_merged['dof'], pt['dof'].iloc[4:])
diff --git a/pingouin/tests/test_parametric.py b/pingouin/tests/test_parametric.py
index fcecbca7..ee33f155 100644
--- a/pingouin/tests/test_parametric.py
+++ b/pingouin/tests/test_parametric.py
@@ -18,8 +18,8 @@
 df_cat[['Time', 'Group', 'Subject']] = df_cat[[
     'Time', 'Group', 'Subject']].astype('category')
 # Let's complicate things even more and add "ghost" Categories
-df_cat['Time'].cat.add_categories('Casper', inplace=True)
-df_cat['Group'].cat.add_categories('The Friendly Ghost', inplace=True)
+df_cat['Time'] = df_cat['Time'].cat.add_categories('Casper')
+df_cat['Group'] = df_cat['Group'].cat.add_categories('The Friendly Ghost')
 
 # Create random normal variables
 np.random.seed(1234)
@@ -197,9 +197,8 @@ def test_anova(self):
         # Unbalanced and with missing values AND between as a categorical
         df_paincat = df_pain.copy()
         df_paincat['Hair color'] = df_paincat['Hair color'].astype('category')
-        df_paincat['Hair color'].cat.add_categories('Bald', inplace=True)
-        aov = df_paincat.anova(dv='Pain threshold',
-                               between='Hair color').round(3)
+        df_paincat['Hair color'] = df_paincat['Hair color'].cat.add_categories('Bald')
+        aov = df_paincat.anova(dv='Pain threshold', between='Hair color').round(3)
         assert aov.at[0, 'ddof1'] == 3
         assert aov.at[0, 'ddof2'] == 13
         assert aov.at[0, 'F'] == 4.359
@@ -381,7 +380,7 @@ def test_rm_anova2(self):
         data_cat = data.copy()
         data_cat[['Subject', 'Time', 'Metric']] = \
             data_cat[['Subject', 'Time', 'Metric']].astype('category')
-        data_cat['Time'].cat.add_categories('Casper', inplace=True)
+        data_cat['Time'] = data_cat['Time'].cat.add_categories('Casper')
         aov = rm_anova(data=data_cat, subject='Subject',
                        within=['Time', 'Metric'], dv='Performance').round(3)
         array_equal(aov.loc[:, 'MS'], [828.817, 682.617, 112.217])
diff --git a/pingouin/tests/test_utils.py b/pingouin/tests/test_utils.py
index bbf2e4a0..0137ffac 100644
--- a/pingouin/tests/test_utils.py
+++ b/pingouin/tests/test_utils.py
@@ -137,6 +137,12 @@ def test_remove_na(self):
         assert np.allclose(y_nan, [[6.], [3.], [2.]])
         # When y is None
         remove_na(x, None, paired=False)
+        # When x or y is an empty list
+        # See https://github.com/raphaelvallat/pingouin/issues/222
+        with pytest.raises(AssertionError):
+            remove_na(x=[], y=0)
+        with pytest.raises(AssertionError):
+            remove_na(x, y=[])
 
     def test_check_eftype(self):
         """Test function _check_eftype."""
diff --git a/pingouin/utils.py b/pingouin/utils.py
index b46e11dc..2563ecb4 100644
--- a/pingouin/utils.py
+++ b/pingouin/utils.py
@@ -225,6 +225,7 @@ def remove_na(x, y=None, paired=False, axis='rows'):
         return _remove_na_single(x, axis=axis), y
     else:  # y is list, np.array, pd.Series
         y = np.asarray(y)
+        assert y.size != 0, "y cannot be an empty list or array."
         # Make sure that we just pass-through if y have only 1 element
         if y.size == 1:
             return _remove_na_single(x, axis=axis), y
diff --git a/requirements.txt b/requirements.txt
index 51c57f3d..5a32a1a9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,8 +2,8 @@ numpy>=1.19
 scipy>=1.7
 pandas>=1.0
 matplotlib>=3.0.2
-seaborn>=0.9.0
-statsmodels>=0.12.0
+seaborn>=0.11
+statsmodels>=0.13
 scikit-learn
 pandas_flavor>=0.2.0
 outdated
diff --git a/setup.py b/setup.py
index 41b0fc27..273c49a2 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@ def read(fname):
 MAINTAINER_EMAIL = 'raphaelvallat9@gmail.com'
 URL = 'https://pingouin-stats.org/index.html'
 DOWNLOAD_URL = 'https://github.com/raphaelvallat/pingouin/'
-VERSION = '0.5.0'
+VERSION = '0.5.1'
 LICENSE = 'GPL-3.0'
 PACKAGE_DATA = {'pingouin.data.icons': ['*.svg']}
 
@@ -24,8 +24,8 @@ def read(fname):
     'scipy>=1.7',
     'pandas>=1.0',
     'matplotlib>=3.0.2',
-    'seaborn>=0.9.0',
-    'statsmodels>=0.12.0',
+    'seaborn>=0.11',
+    'statsmodels>=0.13',
     'scikit-learn',
     'pandas_flavor>=0.2.0',
     'outdated',