diff --git a/CHANGELOG.md b/CHANGELOG.md
index b44fdb079..b832e3559 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
 ## Changelog
 
+#### 0.25.8 - 2021-01-22
+
+Important: we dropped Patsy as our formula framework, and adopted Formulaic. Will the latter is less mature than Patsy, we feel the core capabilities are satisfactory and it provides new opportunities.
+
+##### New features
+ - Parametric models with formulas are able to be serialized now.
+ - a `_scipy_callback` function is available to use in fitting algorithms.
+
+
 #### 0.25.7 - 2020-12-09
 
 ##### API Changes
diff --git a/docs/Changelog.rst b/docs/Changelog.rst
index d6e5568d7..e99722660 100644
--- a/docs/Changelog.rst
+++ b/docs/Changelog.rst
@@ -1,6 +1,22 @@
 Changelog
 =========
 
+0.25.8 - 2021-01-22
+-------------------
+
+Important: we dropped Patsy as our formula framework, and adopted
+Formulaic. Will the latter is less mature than Patsy, we feel the core
+capabilities are satisfactory and it provides new opportunities.
+
+New features
+~~~~~~~~~~~~
+
+-  Parametric models with formulas are able to be serialized now.
+-  a ``_scipy_callback`` function is available to use in fitting
+   algorithms.
+
+.. _section-1:
+
 0.25.7 - 2020-12-09
 -------------------
 
@@ -17,11 +33,13 @@ Bug fixes
 -  Fixed ``concordance_index_`` when no events observed
 -  Fixed label being overwritten in ParametricUnivariate models
 
-.. _section-1:
+.. _section-2:
 
 0.25.6 - 2020-10-26
 -------------------
 
+.. _new-features-1:
+
 New features
 ~~~~~~~~~~~~
 
@@ -40,7 +58,7 @@ Bug fixes
 -  Fix bug in ``KaplanMeierFitter``\ ’s interval censoring where
    max(lower bound) < min(upper bound).
 
-.. _section-2:
+.. _section-3:
 
 0.25.5 - 2020-09-23
 -------------------
@@ -65,12 +83,12 @@ Bug fixes
    parametric models
 -  ``weights`` wasn’t being applied properly in NPMLE
 
-.. _section-3:
+.. _section-4:
 
 0.25.4 - 2020-08-26
 -------------------
 
-.. _new-features-1:
+.. _new-features-2:
 
 New features
 ~~~~~~~~~~~~
@@ -87,12 +105,12 @@ Bug fixes
 
 -  fix ``check_assumptions`` when using formulas.
 
-.. _section-4:
+.. _section-5:
 
 0.25.3 - 2020-08-24
 -------------------
 
-.. _new-features-2:
+.. _new-features-3:
 
 New features
 ~~~~~~~~~~~~
@@ -118,12 +136,12 @@ Bug fixes
 -  fix Python error when calling ``plot_covariate_groups``
 -  fix dtype mismatches in ``plot_partial_effects_on_outcome``.
 
-.. _section-5:
+.. _section-6:
 
 0.25.2 - 2020-08-08
 -------------------
 
-.. _new-features-3:
+.. _new-features-4:
 
 New features
 ~~~~~~~~~~~~
@@ -152,7 +170,7 @@ Bug fixes
 -  fix some exception imports I missed.
 -  fix log-likelihood p-value in splines ``CoxPHFitter``
 
-.. _section-6:
+.. _section-7:
 
 0.25.1 - 2020-08-01
 -------------------
@@ -168,12 +186,12 @@ Bug fixes
 -  put ``patsy`` as a proper dependency.
 -  suppress some Pandas 1.1 warnings.
 
-.. _section-7:
+.. _section-8:
 
 0.25.0 - 2020-07-27
 -------------------
 
-.. _new-features-4:
+.. _new-features-5:
 
 New features
 ~~~~~~~~~~~~
@@ -246,12 +264,12 @@ Bug fixes
 -  fixed NaN bug in ``survival_table_from_events`` with intervals when
    no events would occur in a interval.
 
-.. _section-8:
+.. _section-9:
 
 0.24.16 - 2020-07-09
 --------------------
 
-.. _new-features-5:
+.. _new-features-6:
 
 New features
 ~~~~~~~~~~~~
@@ -266,7 +284,7 @@ Bug fixes
 
 -  fixed ``utils.median_survival_time`` not accepting Pandas Series.
 
-.. _section-9:
+.. _section-10:
 
 0.24.15 - 2020-07-07
 --------------------
@@ -282,7 +300,7 @@ Bug fixes
 -  fixed bug where using ``conditional_after`` and ``times`` in
    ``CoxPHFitter("spline")`` prediction methods would be ignored.
 
-.. _section-10:
+.. _section-11:
 
 0.24.14 - 2020-07-02
 --------------------
@@ -299,7 +317,7 @@ Bug fixes
 -  fixed a bug where some columns would not be displayed in
    ``print_summary``
 
-.. _section-11:
+.. _section-12:
 
 0.24.13 - 2020-06-22
 --------------------
@@ -314,24 +332,24 @@ Bug fixes
 -  fixed a bug where ``CoxPHFitter`` would fail with working with
    ``sklearn_adapter``
 
-.. _section-12:
+.. _section-13:
 
 0.24.12 - 2020-06-20
 --------------------
 
-.. _new-features-6:
+.. _new-features-7:
 
 New features
 ~~~~~~~~~~~~
 
 -  improved convergence of ``GeneralizedGamma(Regression)Fitter``.
 
-.. _section-13:
+.. _section-14:
 
 0.24.11 - 2020-06-17
 --------------------
 
-.. _new-features-7:
+.. _new-features-8:
 
 New features
 ~~~~~~~~~~~~
@@ -354,12 +372,12 @@ API Changes
    penalized by ``penalizer`` - we now penalizing everything except
    intercept terms in linear relationships.
 
-.. _section-14:
+.. _section-15:
 
 0.24.10 - 2020-06-16
 --------------------
 
-.. _new-features-8:
+.. _new-features-9:
 
 New features
 ~~~~~~~~~~~~
@@ -384,12 +402,12 @@ Bug fixes
 -  fixed a bug in initialization of some interval-censoring models ->
    better convergence.
 
-.. _section-15:
+.. _section-16:
 
 0.24.9 - 2020-06-05
 -------------------
 
-.. _new-features-9:
+.. _new-features-10:
 
 New features
 ~~~~~~~~~~~~
@@ -407,12 +425,12 @@ Bug fixes
 -  Cleared up some mislabeling in ``plot_loglogs``. Thanks @sean-reed!
 -  tuples are now able to be used as input in univariate models.
 
-.. _section-16:
+.. _section-17:
 
 0.24.8 - 2020-05-17
 -------------------
 
-.. _new-features-10:
+.. _new-features-11:
 
 New features
 ~~~~~~~~~~~~
@@ -421,12 +439,12 @@ New features
    Not all edge cases are fully checked, and some features are missing.
    Try it under ``KaplanMeierFitter.fit_interval_censoring``
 
-.. _section-17:
+.. _section-18:
 
 0.24.7 - 2020-05-17
 -------------------
 
-.. _new-features-11:
+.. _new-features-12:
 
 New features
 ~~~~~~~~~~~~
@@ -442,12 +460,12 @@ New features
 -  some convergence tweaks which should help recent performance
    regressions.
 
-.. _section-18:
+.. _section-19:
 
 0.24.6 - 2020-05-05
 -------------------
 
-.. _new-features-12:
+.. _new-features-13:
 
 New features
 ~~~~~~~~~~~~
@@ -465,12 +483,12 @@ Bug fixes
 -  fixed bug where ``cdf_plot`` and ``qq_plot`` were not factoring in
    the weights correctly.
 
-.. _section-19:
+.. _section-20:
 
 0.24.5 - 2020-05-01
 -------------------
 
-.. _new-features-13:
+.. _new-features-14:
 
 New features
 ~~~~~~~~~~~~
@@ -487,7 +505,7 @@ Bug fixes
 -  Improved ``at_risk_counts`` for subplots.
 -  More data validation checks for ``CoxTimeVaryingFitter``
 
-.. _section-20:
+.. _section-21:
 
 0.24.4 - 2020-04-13
 -------------------
@@ -501,12 +519,12 @@ Bug fixes
 -  setting a dataframe in ``ancillary_df`` works for interval censoring
 -  ``.score`` works for interval censored models
 
-.. _section-21:
+.. _section-22:
 
 0.24.3 - 2020-03-25
 -------------------
 
-.. _new-features-14:
+.. _new-features-15:
 
 New features
 ~~~~~~~~~~~~
@@ -524,7 +542,7 @@ Bug fixes
 -  Fixed error in HTML printer that was hiding concordance index
    information.
 
-.. _section-22:
+.. _section-23:
 
 0.24.2 - 2020-03-15
 -------------------
@@ -541,12 +559,12 @@ Bug fixes
 -  Fixed a keyword bug in ``plot_covariate_groups`` for parametric
    models.
 
-.. _section-23:
+.. _section-24:
 
 0.24.1 - 2020-03-05
 -------------------
 
-.. _new-features-15:
+.. _new-features-16:
 
 New features
 ~~~~~~~~~~~~
@@ -561,7 +579,7 @@ Bug fixes
 
 -  Fixed bug with plotting hazards in NelsonAalenFitter.
 
-.. _section-24:
+.. _section-25:
 
 0.24.0 - 2020-02-20
 -------------------
@@ -570,7 +588,7 @@ This version and future versions of lifelines no longer support py35.
 Pandas 1.0 is fully supported, along with previous versions. Minimum
 Scipy has been bumped to 1.2.0.
 
-.. _new-features-16:
+.. _new-features-17:
 
 New features
 ~~~~~~~~~~~~
@@ -635,7 +653,7 @@ Bug fixes
 -  Cox models now incorporate any penalizers in their
    ``log_likelihood_``
 
-.. _section-25:
+.. _section-26:
 
 0.23.9 - 2020-01-28
 -------------------
@@ -651,7 +669,7 @@ Bug fixes
    of ``GeneralizedGammaRegressionFitter`` and any custom regression
    models should update their code as soon as possible.
 
-.. _section-26:
+.. _section-27:
 
 0.23.8 - 2020-01-21
 -------------------
@@ -667,19 +685,19 @@ Bug fixes
    ``GeneralizedGammaRegressionFitter`` and any custom regression models
    should update their code as soon as possible.
 
-.. _section-27:
+.. _section-28:
 
 0.23.7 - 2020-01-14
 -------------------
 
 Bug fixes for py3.5.
 
-.. _section-28:
+.. _section-29:
 
 0.23.6 - 2020-01-07
 -------------------
 
-.. _new-features-17:
+.. _new-features-18:
 
 New features
 ~~~~~~~~~~~~
@@ -693,12 +711,12 @@ New features
 -  custom parametric regression models can now do left and interval
    censoring.
 
-.. _section-29:
+.. _section-30:
 
 0.23.5 - 2020-01-05
 -------------------
 
-.. _new-features-18:
+.. _new-features-19:
 
 New features
 ~~~~~~~~~~~~
@@ -717,19 +735,19 @@ Bug fixes
 -  fixed bug where large exponential numbers in ``print_summary`` were
    not being suppressed correctly.
 
-.. _section-30:
+.. _section-31:
 
 0.23.4 - 2019-12-15
 -------------------
 
 -  Bug fix for PyPI
 
-.. _section-31:
+.. _section-32:
 
 0.23.3 - 2019-12-11
 -------------------
 
-.. _new-features-19:
+.. _new-features-20:
 
 New features
 ~~~~~~~~~~~~
@@ -744,12 +762,12 @@ Bug fixes
 -  fix import in ``printer.py``
 -  fix html printing with Univariate models.
 
-.. _section-32:
+.. _section-33:
 
 0.23.2 - 2019-12-07
 -------------------
 
-.. _new-features-20:
+.. _new-features-21:
 
 New features
 ~~~~~~~~~~~~
@@ -770,12 +788,12 @@ Bug fixes
 -  fixed repr for ``sklearn_adapter`` classes.
 -  fixed ``conditional_after`` in Cox model with strata was used.
 
-.. _section-33:
+.. _section-34:
 
 0.23.1 - 2019-11-27
 -------------------
 
-.. _new-features-21:
+.. _new-features-22:
 
 New features
 ~~~~~~~~~~~~
@@ -797,12 +815,12 @@ Bug fixes
 -  fixed bug when using ``print_summary`` with left censored models.
 -  lots of minor bug fixes.
 
-.. _section-34:
+.. _section-35:
 
 0.23.0 - 2019-11-17
 -------------------
 
-.. _new-features-22:
+.. _new-features-23:
 
 New features
 ~~~~~~~~~~~~
@@ -833,7 +851,7 @@ API Changes
 -  ``left_censorship`` in ``fit`` has been removed in favour of
    ``fit_left_censoring``.
 
-.. _section-35:
+.. _section-36:
 
 0.22.10 - 2019-11-08
 --------------------
@@ -851,7 +869,7 @@ Bug fixes
 -  fixed bug in plot_covariate_groups for AFT models when >1d arrays
    were used for values arg.
 
-.. _section-36:
+.. _section-37:
 
 0.22.9 - 2019-10-30
 -------------------
@@ -868,12 +886,12 @@ Bug fixes
 -  ``CoxPHFitter`` now displays correct columns values when changing
    alpha param.
 
-.. _section-37:
+.. _section-38:
 
 0.22.8 - 2019-10-06
 -------------------
 
-.. _new-features-23:
+.. _new-features-24:
 
 New features
 ~~~~~~~~~~~~
@@ -890,12 +908,12 @@ Bug fixes
 
 -  fixed initial_point being ignored in AFT models.
 
-.. _section-38:
+.. _section-39:
 
 0.22.7 - 2019-09-29
 -------------------
 
-.. _new-features-24:
+.. _new-features-25:
 
 New features
 ~~~~~~~~~~~~
@@ -922,12 +940,12 @@ API Changes
 -  Some previous ``StatisticalWarnings`` have been replaced by
    ``ApproximationWarning``
 
-.. _section-39:
+.. _section-40:
 
 0.22.6 - 2019-09-25
 -------------------
 
-.. _new-features-25:
+.. _new-features-26:
 
 New features
 ~~~~~~~~~~~~
@@ -950,12 +968,12 @@ API Changes
 -  ``utils.dataframe_interpolate_at_times`` renamed to
    ``utils.interpolate_at_times_and_return_pandas``.
 
-.. _section-40:
+.. _section-41:
 
 0.22.5 - 2019-09-20
 -------------------
 
-.. _new-features-26:
+.. _new-features-27:
 
 New features
 ~~~~~~~~~~~~
@@ -981,12 +999,12 @@ API Changes
 -  ``_get_initial_value`` in parametric univariate models is renamed
    ``_create_initial_point``
 
-.. _section-41:
+.. _section-42:
 
 0.22.4 - 2019-09-04
 -------------------
 
-.. _new-features-27:
+.. _new-features-28:
 
 New features
 ~~~~~~~~~~~~
@@ -1013,12 +1031,12 @@ Bug fixes
 -  Fixed issue where ``concordance_index`` would never exit if NaNs in
    dataset.
 
-.. _section-42:
+.. _section-43:
 
 0.22.3 - 2019-08-08
 -------------------
 
-.. _new-features-28:
+.. _new-features-29:
 
 New features
 ~~~~~~~~~~~~
@@ -1054,12 +1072,12 @@ Bug fixes
 -  Fixed an error in the ``predict_percentile`` of
    ``LogLogisticAFTFitter``. New tests have been added around this.
 
-.. _section-43:
+.. _section-44:
 
 0.22.2 - 2019-07-25
 -------------------
 
-.. _new-features-29:
+.. _new-features-30:
 
 New features
 ~~~~~~~~~~~~
@@ -1077,12 +1095,12 @@ Bug fixes
    errors when using the library. The correctly numpy has been pinned
    (to 1.14.0+)
 
-.. _section-44:
+.. _section-45:
 
 0.22.1 - 2019-07-14
 -------------------
 
-.. _new-features-30:
+.. _new-features-31:
 
 New features
 ~~~~~~~~~~~~
@@ -1120,12 +1138,12 @@ Bug fixes
 -  fixed an overflow bug in ``KaplanMeierFitter`` confidence intervals
 -  improvements in data validation for ``CoxTimeVaryingFitter``
 
-.. _section-45:
+.. _section-46:
 
 0.22.0 - 2019-07-03
 -------------------
 
-.. _new-features-31:
+.. _new-features-32:
 
 New features
 ~~~~~~~~~~~~
@@ -1168,14 +1186,14 @@ Bug fixes
    is now exact instead of an approximation.
 -  fixed a name error bug in ``CoxTimeVaryingFitter.plot``
 
-.. _section-46:
+.. _section-47:
 
 0.21.5 - 2019-06-22
 -------------------
 
 I’m skipping 0.21.4 version because of deployment issues.
 
-.. _new-features-32:
+.. _new-features-33:
 
 New features
 ~~~~~~~~~~~~
@@ -1192,12 +1210,12 @@ Bug fixes
 -  fixed visual bug that misaligned x-axis ticks and at-risk counts.
    Thanks @christopherahern!
 
-.. _section-47:
+.. _section-48:
 
 0.21.3 - 2019-06-04
 -------------------
 
-.. _new-features-33:
+.. _new-features-34:
 
 New features
 ~~~~~~~~~~~~
@@ -1218,12 +1236,12 @@ Bug fixes
 
 -  ``covariates_from_event_matrix`` handle nulls better
 
-.. _section-48:
+.. _section-49:
 
 0.21.2 - 2019-05-16
 -------------------
 
-.. _new-features-34:
+.. _new-features-35:
 
 New features
 ~~~~~~~~~~~~
@@ -1252,12 +1270,12 @@ API changes
 Bug fixes
 ~~~~~~~~~
 
-.. _section-49:
+.. _section-50:
 
 0.21.1 - 2019-04-26
 -------------------
 
-.. _new-features-35:
+.. _new-features-36:
 
 New features
 ~~~~~~~~~~~~
@@ -1281,12 +1299,12 @@ Bug fixes
 
 -  fixed bug in CoxTimeVaryingFitter when ax is provided, thanks @j-i-l!
 
-.. _section-50:
+.. _section-51:
 
 0.21.0 - 2019-04-12
 -------------------
 
-.. _new-features-36:
+.. _new-features-37:
 
 New features
 ~~~~~~~~~~~~
@@ -1321,12 +1339,12 @@ Bug fixes
 -  Fixed an error that didn’t let users use Numpy arrays in prediction
    for AFT models
 
-.. _section-51:
+.. _section-52:
 
 0.20.5 - 2019-04-08
 -------------------
 
-.. _new-features-37:
+.. _new-features-38:
 
 New features
 ~~~~~~~~~~~~
@@ -1352,12 +1370,12 @@ Bug fixes
    test when using strata.
 -  Fixed some plotting bugs with ``AalenJohansenFitter``
 
-.. _section-52:
+.. _section-53:
 
 0.20.4 - 2019-03-27
 -------------------
 
-.. _new-features-38:
+.. _new-features-39:
 
 New features
 ~~~~~~~~~~~~
@@ -1385,12 +1403,12 @@ Bug fixes
 -  ``PiecewiseExponentialFitter`` is available with
    ``from lifelines import *``.
 
-.. _section-53:
+.. _section-54:
 
 0.20.3 - 2019-03-23
 -------------------
 
-.. _new-features-39:
+.. _new-features-40:
 
 New features
 ~~~~~~~~~~~~
@@ -1403,12 +1421,12 @@ New features
    ``plot_survival_function`` and
    ``confidence_interval_survival_function_``.
 
-.. _section-54:
+.. _section-55:
 
 0.20.2 - 2019-03-21
 -------------------
 
-.. _new-features-40:
+.. _new-features-41:
 
 New features
 ~~~~~~~~~~~~
@@ -1448,7 +1466,7 @@ Bug fixes
    the q parameter was below the truncation limit. This should have been
    ``-np.inf``
 
-.. _section-55:
+.. _section-56:
 
 0.20.1 - 2019-03-16
 -------------------
@@ -1472,7 +1490,7 @@ API changes
    This is no longer the case. A 0 will still be added if there is a
    duration (observed or not) at 0 occurs however.
 
-.. _section-56:
+.. _section-57:
 
 0.20.0 - 2019-03-05
 -------------------
@@ -1481,7 +1499,7 @@ API changes
    recent installs where Py3.
 -  Updated minimum dependencies, specifically Matplotlib and Pandas.
 
-.. _new-features-41:
+.. _new-features-42:
 
 New features
 ~~~~~~~~~~~~
@@ -1508,12 +1526,12 @@ Bug fixes
 
 -  Fixed a bug with plotting and ``check_assumptions``.
 
-.. _section-57:
+.. _section-58:
 
 0.19.5 - 2019-02-26
 -------------------
 
-.. _new-features-42:
+.. _new-features-43:
 
 New features
 ~~~~~~~~~~~~
@@ -1523,7 +1541,7 @@ New features
    features or categorical variables.
 -  Convergence improvements for AFT models.
 
-.. _section-58:
+.. _section-59:
 
 0.19.4 - 2019-02-25
 -------------------
@@ -1535,12 +1553,12 @@ Bug fixes
 
 -  remove some bad print statements in ``CoxPHFitter``.
 
-.. _section-59:
+.. _section-60:
 
 0.19.3 - 2019-02-25
 -------------------
 
-.. _new-features-43:
+.. _new-features-44:
 
 New features
 ~~~~~~~~~~~~
@@ -1552,12 +1570,12 @@ New features
 -  Performance increase to ``print_summary`` in the ``CoxPHFitter`` and
    ``CoxTimeVaryingFitter`` model.
 
-.. _section-60:
+.. _section-61:
 
 0.19.2 - 2019-02-22
 -------------------
 
-.. _new-features-44:
+.. _new-features-45:
 
 New features
 ~~~~~~~~~~~~
@@ -1575,12 +1593,12 @@ Bug fixes
 -  Univariate fitters are more flexiable and can allow 2-d and
    DataFrames as inputs.
 
-.. _section-61:
+.. _section-62:
 
 0.19.1 - 2019-02-21
 -------------------
 
-.. _new-features-45:
+.. _new-features-46:
 
 New features
 ~~~~~~~~~~~~
@@ -1597,12 +1615,12 @@ API changes
    ``PiecewiseExponential`` to the same as ``ExponentialFitter`` (from
    ``\lambda * t`` to ``t / \lambda``).
 
-.. _section-62:
+.. _section-63:
 
 0.19.0 - 2019-02-20
 -------------------
 
-.. _new-features-46:
+.. _new-features-47:
 
 New features
 ~~~~~~~~~~~~
@@ -1658,7 +1676,7 @@ Bug Fixes
    models. Thanks @airanmehr!
 -  Fixed some Pandas <0.24 bugs.
 
-.. _section-63:
+.. _section-64:
 
 0.18.6 - 2019-02-13
 -------------------
@@ -1668,7 +1686,7 @@ Bug Fixes
    ``rank`` and ``km`` p-values now.
 -  some performance improvements to ``qth_survival_time``.
 
-.. _section-64:
+.. _section-65:
 
 0.18.5 - 2019-02-11
 -------------------
@@ -1689,7 +1707,7 @@ Bug Fixes
    that can be used to turn off variance calculations since this can
    take a long time for large datasets. Thanks @pzivich!
 
-.. _section-65:
+.. _section-66:
 
 0.18.4 - 2019-02-10
 -------------------
@@ -1699,7 +1717,7 @@ Bug Fixes
 -  adding left-truncation support to parametric univarite models with
    the ``entry`` kwarg in ``.fit``
 
-.. _section-66:
+.. _section-67:
 
 0.18.3 - 2019-02-07
 -------------------
@@ -1709,7 +1727,7 @@ Bug Fixes
    warnings are more noticeable.
 -  Improved some warning and error messages.
 
-.. _section-67:
+.. _section-68:
 
 0.18.2 - 2019-02-05
 -------------------
@@ -1725,7 +1743,7 @@ Bug Fixes
    Moved them all (most) to use ``autograd``.
 -  ``LogNormalFitter`` no longer models ``log_sigma``.
 
-.. _section-68:
+.. _section-69:
 
 0.18.1 - 2019-02-02
 -------------------
@@ -1736,7 +1754,7 @@ Bug Fixes
 -  use the ``autograd`` lib to help with gradients.
 -  New ``LogLogisticFitter`` univariate fitter available.
 
-.. _section-69:
+.. _section-70:
 
 0.18.0 - 2019-01-31
 -------------------
@@ -1773,7 +1791,7 @@ Bug Fixes
    ``LinAlgError: Matrix is singular.`` and report back to the user
    advice.
 
-.. _section-70:
+.. _section-71:
 
 0.17.5 - 2019-01-25
 -------------------
@@ -1781,7 +1799,7 @@ Bug Fixes
 -  more bugs in ``plot_covariate_groups`` fixed when using non-numeric
    strata.
 
-.. _section-71:
+.. _section-72:
 
 0.17.4 -2019-01-25
 ------------------
@@ -1793,7 +1811,7 @@ Bug Fixes
 -  ``groups`` is now called ``values`` in
    ``CoxPHFitter.plot_covariate_groups``
 
-.. _section-72:
+.. _section-73:
 
 0.17.3 - 2019-01-24
 -------------------
@@ -1801,7 +1819,7 @@ Bug Fixes
 -  Fix in ``compute_residuals`` when using ``schoenfeld`` and the
    minumum duration has only censored subjects.
 
-.. _section-73:
+.. _section-74:
 
 0.17.2 2019-01-22
 -----------------
@@ -1812,7 +1830,7 @@ Bug Fixes
    ``for`` loop. The downside is the code is more esoteric now. I’ve
    added comments as necessary though 🤞
 
-.. _section-74:
+.. _section-75:
 
 0.17.1 - 2019-01-20
 -------------------
@@ -1829,7 +1847,7 @@ Bug Fixes
 -  Fixes a Pandas performance warning in ``CoxTimeVaryingFitter``.
 -  Performances improvements to ``CoxTimeVaryingFitter``.
 
-.. _section-75:
+.. _section-76:
 
 0.17.0 - 2019-01-11
 -------------------
@@ -1850,7 +1868,7 @@ Bug Fixes
 
 -  some plotting improvemnts to ``plotting.plot_lifetimes``
 
-.. _section-76:
+.. _section-77:
 
 0.16.3 - 2019-01-03
 -------------------
@@ -1858,7 +1876,7 @@ Bug Fixes
 -  More ``CoxPHFitter`` performance improvements. Up to a 40% reduction
    vs 0.16.2 for some datasets.
 
-.. _section-77:
+.. _section-78:
 
 0.16.2 - 2019-01-02
 -------------------
@@ -1869,14 +1887,14 @@ Bug Fixes
    has lots of duplicate times. See
    https://github.com/CamDavidsonPilon/lifelines/issues/591
 
-.. _section-78:
+.. _section-79:
 
 0.16.1 - 2019-01-01
 -------------------
 
 -  Fixed py2 division error in ``concordance`` method.
 
-.. _section-79:
+.. _section-80:
 
 0.16.0 - 2019-01-01
 -------------------
@@ -1912,7 +1930,7 @@ Bug Fixes
    ``lifelines.utils.to_episodic_format``.
 -  ``CoxTimeVaryingFitter`` now accepts ``strata``.
 
-.. _section-80:
+.. _section-81:
 
 0.15.4
 ------
@@ -1920,14 +1938,14 @@ Bug Fixes
 -  bug fix for the Cox model likelihood ratio test when using
    non-trivial weights.
 
-.. _section-81:
+.. _section-82:
 
 0.15.3 - 2018-12-18
 -------------------
 
 -  Only allow matplotlib less than 3.0.
 
-.. _section-82:
+.. _section-83:
 
 0.15.2 - 2018-11-23
 -------------------
@@ -1938,7 +1956,7 @@ Bug Fixes
 -  removed ``entry`` from ``ExponentialFitter`` and ``WeibullFitter`` as
    it was doing nothing.
 
-.. _section-83:
+.. _section-84:
 
 0.15.1 - 2018-11-23
 -------------------
@@ -1947,7 +1965,7 @@ Bug Fixes
 -  Raise NotImplementedError if the ``robust`` flag is used in
    ``CoxTimeVaryingFitter`` - that’s not ready yet.
 
-.. _section-84:
+.. _section-85:
 
 0.15.0 - 2018-11-22
 -------------------
@@ -2018,7 +2036,7 @@ Bug Fixes
    When Estimating Risks in Pharmacoepidemiology” for a nice overview of
    the model.
 
-.. _section-85:
+.. _section-86:
 
 0.14.6 - 2018-07-02
 -------------------
@@ -2026,7 +2044,7 @@ Bug Fixes
 -  fix for n > 2 groups in ``multivariate_logrank_test`` (again).
 -  fix bug for when ``event_observed`` column was not boolean.
 
-.. _section-86:
+.. _section-87:
 
 0.14.5 - 2018-06-29
 -------------------
@@ -2034,7 +2052,7 @@ Bug Fixes
 -  fix for n > 2 groups in ``multivariate_logrank_test``
 -  fix weights in KaplanMeierFitter when using a pandas Series.
 
-.. _section-87:
+.. _section-88:
 
 0.14.4 - 2018-06-14
 -------------------
@@ -2051,7 +2069,7 @@ Bug Fixes
 -  New ``delay`` parameter in ``add_covariate_to_timeline``
 -  removed ``two_sided_z_test`` from ``statistics``
 
-.. _section-88:
+.. _section-89:
 
 0.14.3 - 2018-05-24
 -------------------
@@ -2063,7 +2081,7 @@ Bug Fixes
 -  adds a ``column`` argument to ``CoxTimeVaryingFitter`` and
    ``CoxPHFitter`` ``plot`` method to plot only a subset of columns.
 
-.. _section-89:
+.. _section-90:
 
 0.14.2 - 2018-05-18
 -------------------
@@ -2071,7 +2089,7 @@ Bug Fixes
 -  some quality of life improvements for working with
    ``CoxTimeVaryingFitter`` including new ``predict_`` methods.
 
-.. _section-90:
+.. _section-91:
 
 0.14.1 - 2018-04-01
 -------------------
@@ -2089,7 +2107,7 @@ Bug Fixes
    faster completion of ``fit`` for large dataframes, and up to 10%
    faster for small dataframes.
 
-.. _section-91:
+.. _section-92:
 
 0.14.0 - 2018-03-03
 -------------------
@@ -2111,7 +2129,7 @@ Bug Fixes
    of a ``RuntimeWarning``
 -  New checks for complete separation in the dataset for regressions.
 
-.. _section-92:
+.. _section-93:
 
 0.13.0 - 2017-12-22
 -------------------
@@ -2140,7 +2158,7 @@ Bug Fixes
    group the same subjects together and give that observation a weight
    equal to the count. Altogether, this means a much faster regression.
 
-.. _section-93:
+.. _section-94:
 
 0.12.0
 ------
@@ -2157,7 +2175,7 @@ Bug Fixes
 -  Additional functionality to ``utils.survival_table_from_events`` to
    bin the index to make the resulting table more readable.
 
-.. _section-94:
+.. _section-95:
 
 0.11.3
 ------
@@ -2169,7 +2187,7 @@ Bug Fixes
    observation or censorship.
 -  More accurate prediction methods parametrics univariate models.
 
-.. _section-95:
+.. _section-96:
 
 0.11.2
 ------
@@ -2177,14 +2195,14 @@ Bug Fixes
 -  Changing liscense to valilla MIT.
 -  Speed up ``NelsonAalenFitter.fit`` considerably.
 
-.. _section-96:
+.. _section-97:
 
 0.11.1 - 2017-06-22
 -------------------
 
 -  Python3 fix for ``CoxPHFitter.plot``.
 
-.. _section-97:
+.. _section-98:
 
 0.11.0 - 2017-06-21
 -------------------
@@ -2198,14 +2216,14 @@ Bug Fixes
    of a new ``loc`` kwarg. This is to align with Pandas deprecating
    ``ix``
 
-.. _section-98:
+.. _section-99:
 
 0.10.1 - 2017-06-05
 -------------------
 
 -  fix in internal normalization for ``CoxPHFitter`` predict methods.
 
-.. _section-99:
+.. _section-100:
 
 0.10.0
 ------
@@ -2220,7 +2238,7 @@ Bug Fixes
    mimic R’s ``basehaz`` API.
 -  new ``predict_log_partial_hazards`` to ``CoxPHFitter``
 
-.. _section-100:
+.. _section-101:
 
 0.9.4
 -----
@@ -2243,7 +2261,7 @@ Bug Fixes
 -  performance improvements in ``CoxPHFitter`` - should see at least a
    10% speed improvement in ``fit``.
 
-.. _section-101:
+.. _section-102:
 
 0.9.2
 -----
@@ -2252,7 +2270,7 @@ Bug Fixes
 -  throw an error if no admissable pairs in the c-index calculation.
    Previously a NaN was returned.
 
-.. _section-102:
+.. _section-103:
 
 0.9.1
 -----
@@ -2260,7 +2278,7 @@ Bug Fixes
 -  add two summary functions to Weibull and Exponential fitter, solves
    #224
 
-.. _section-103:
+.. _section-104:
 
 0.9.0
 -----
@@ -2276,7 +2294,7 @@ Bug Fixes
 -  Default predict method in ``k_fold_cross_validation`` is now
    ``predict_expectation``
 
-.. _section-104:
+.. _section-105:
 
 0.8.1 - 2015-08-01
 ------------------
@@ -2293,7 +2311,7 @@ Bug Fixes
    -  scaling of smooth hazards in NelsonAalenFitter was off by a factor
       of 0.5.
 
-.. _section-105:
+.. _section-106:
 
 0.8.0
 -----
@@ -2312,7 +2330,7 @@ Bug Fixes
    ``lifelines.statistics. power_under_cph``.
 -  fixed a bug when using KaplanMeierFitter for left-censored data.
 
-.. _section-106:
+.. _section-107:
 
 0.7.1
 -----
@@ -2331,7 +2349,7 @@ Bug Fixes
 -  refactor each fitter into it’s own submodule. For now, the tests are
    still in the same file. This will also *not* break the API.
 
-.. _section-107:
+.. _section-108:
 
 0.7.0 - 2015-03-01
 ------------------
@@ -2350,7 +2368,7 @@ Bug Fixes
    duration remaining until the death event, given survival up until
    time t.
 
-.. _section-108:
+.. _section-109:
 
 0.6.1
 -----
@@ -2362,7 +2380,7 @@ Bug Fixes
    your work is to sum up the survival function (for expected values or
    something similar), it’s more difficult to make a mistake.
 
-.. _section-109:
+.. _section-110:
 
 0.6.0 - 2015-02-04
 ------------------
@@ -2385,7 +2403,7 @@ Bug Fixes
 -  In ``KaplanMeierFitter``, ``epsilon`` has been renamed to
    ``precision``.
 
-.. _section-110:
+.. _section-111:
 
 0.5.1 - 2014-12-24
 ------------------
@@ -2406,7 +2424,7 @@ Bug Fixes
    ``lifelines.plotting.add_at_risk_counts``.
 -  Fix bug Epanechnikov kernel.
 
-.. _section-111:
+.. _section-112:
 
 0.5.0 - 2014-12-07
 ------------------
@@ -2419,7 +2437,7 @@ Bug Fixes
 -  add test for summary()
 -  Alternate metrics can be used for ``k_fold_cross_validation``.
 
-.. _section-112:
+.. _section-113:
 
 0.4.4 - 2014-11-27
 ------------------
@@ -2431,7 +2449,7 @@ Bug Fixes
 -  Fixes bug in 1-d input not returning in CoxPHFitter
 -  Lots of new tests.
 
-.. _section-113:
+.. _section-114:
 
 0.4.3 - 2014-07-23
 ------------------
@@ -2452,7 +2470,7 @@ Bug Fixes
 -  Adds option ``include_likelihood`` to CoxPHFitter fit method to save
    the final log-likelihood value.
 
-.. _section-114:
+.. _section-115:
 
 0.4.2 - 2014-06-19
 ------------------
@@ -2472,7 +2490,7 @@ Bug Fixes
    from failing so often (this a stop-gap)
 -  pep8 everything
 
-.. _section-115:
+.. _section-116:
 
 0.4.1.1
 -------
@@ -2485,7 +2503,7 @@ Bug Fixes
 -  Adding more robust cross validation scheme based on issue #67.
 -  fixing ``regression_dataset`` in ``datasets``.
 
-.. _section-116:
+.. _section-117:
 
 0.4.1 - 2014-06-11
 ------------------
@@ -2504,7 +2522,7 @@ Bug Fixes
 -  Adding a Changelog.
 -  more sanitizing for the statistical tests =)
 
-.. _section-117:
+.. _section-118:
 
 0.4.0 - 2014-06-08
 ------------------
diff --git a/docs/Survival Regression.rst b/docs/Survival Regression.rst
index e921f5701..d6b84e9b2 100644
--- a/docs/Survival Regression.rst	
+++ b/docs/Survival Regression.rst	
@@ -999,32 +999,6 @@ located under :class:`~lifelines.fitters.aalen_additive_fitter.AalenAdditiveFitt
     +-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+-------------+-------------+----------+--------+--------+
 
 
-I'm using the lovely library `Patsy <https://github.com/pydata/patsy>`__ here to create a
-design matrix from my original DataFrame.
-
-.. code:: python
-
-    import patsy
-    X = patsy.dmatrix('un_continent_name + regime + start_year', data, return_type='dataframe')
-    X = X.rename(columns={'Intercept': 'baseline'})
-
-    print(X.columns.tolist())
-
-
-.. parsed-literal::
-
-  ['baseline',
-   'un_continent_name[T.Americas]',
-   'un_continent_name[T.Asia]',
-   'un_continent_name[T.Europe]',
-   'un_continent_name[T.Oceania]',
-   'regime[T.Military Dict]',
-   'regime[T.Mixed Dem]',
-   'regime[T.Monarchy]',
-   'regime[T.Parliamentary Dem]',
-   'regime[T.Presidential Dem]',
-   'start_year']
-
 
 We have also included the ``coef_penalizer`` option. During the estimation, a
 linear regression is computed at each step. Often the regression can be
@@ -1047,7 +1021,7 @@ two individual columns: a *duration* column and a boolean *event occurred* colum
 
 .. code:: python
 
-    aaf.fit(X, 'T', event_col='E')
+    aaf.fit(X, 'T', event_col='E', formula='un_continent_name + regime + start_year')
 
 
 After fitting, the instance exposes a :attr:`~lifelines.fitters.aalen_additive_fitter.AalenAdditiveFitter.cumulative_hazards_` DataFrame
diff --git a/lifelines/fitters/__init__.py b/lifelines/fitters/__init__.py
index 49f0c107c..cefe2c891 100644
--- a/lifelines/fitters/__init__.py
+++ b/lifelines/fitters/__init__.py
@@ -288,6 +288,7 @@ class ParametricUnivariateFitter(UnivariateFitter):
     _MIN_PARAMETER_VALUE = 1e-9
     _scipy_fit_method = "L-BFGS-B"
     _scipy_fit_options: Dict[str, Any] = dict()
+    _scipy_fit_callback = None
     _fitted_parameter_names: List[str]
 
     def __init__(self, *args, **kwargs):
@@ -547,6 +548,7 @@ def _fit_model(self, Ts, E, entry, weights, show_progress=True):
                     args=(Ts, E, entry, weights),
                     bounds=self._bounds,
                     options=option,
+                    callback=self._scipy_fit_callback,
                 )
                 previous_results = results
 
@@ -1258,9 +1260,10 @@ def _compute_central_values_of_raw_training_data(self, df, strata=None, name="ba
 
         else:
             from distutils.version import LooseVersion
-            if LooseVersion(pd.__version__) >= '1.1.0':
+
+            if LooseVersion(pd.__version__) >= "1.1.0":
                 # silence deprecation warning
-                describe_kwarg = {'datetime_is_numeric': True}
+                describe_kwarg = {"datetime_is_numeric": True}
             else:
                 describe_kwarg = {}
             described = df.describe(include="all", **describe_kwarg)
@@ -1317,6 +1320,7 @@ class ParametricRegressionFitter(RegressionFitter):
 
     _scipy_fit_method = "BFGS"
     _scipy_fit_options: Dict[str, Any] = dict()
+    _scipy_fit_callback = None
     fit_intercept = False
     force_no_intercept = False
     regressors = None
@@ -1803,8 +1807,8 @@ def _fit(
         # https://github.com/CamDavidsonPilon/lifelines/issues/931
         assert list(self.regressors.keys()) == list(self._norm_std.index.get_level_values(0).unique())
         _params = np.concatenate([_params[k] for k in self.regressors.keys()])
-        self.params_ = _params / self._norm_std
 
+        self.params_ = _params / self._norm_std
         self.variance_matrix_ = pd.DataFrame(self._compute_variance_matrix(), index=_index, columns=_index)
         self.standard_errors_ = self._compute_standard_errors(Ts, E.values, weights.values, entries.values, Xs)
         self.confidence_intervals_ = self._compute_confidence_intervals()
@@ -1902,6 +1906,7 @@ def _fit_model(self, likelihood, Ts, Xs, E, weights, entries, show_progress=Fals
                 jac=True,
                 args=(Ts, E, weights, entries, utils.DataframeSlicer(Xs)),
                 options={**{"disp": show_progress}, **self._scipy_fit_options},
+                callback=self._scipy_fit_callback,
             )
 
             if results.fun < minimum_ll:
@@ -2704,7 +2709,7 @@ def fit(
             diagnostics. Useful if convergence is failing.
 
         formula: string
-            Use an R-style formula for modeling the dataset. See formula syntax: https://patsy.readthedocs.io/en/latest/quickstart.html
+            Use an R-style formula for modeling the dataset. See formula syntax: https://matthewwardrop.github.io/formulaic/basic/grammar/
 
         ancillary: None, boolean, str, or DataFrame, optional (default=None)
             Choose to model the ancillary parameters.
@@ -2859,7 +2864,7 @@ def fit_interval_censoring(
             observation. If left as None, will be inferred from the start and stop columns (lower_bound==upper_bound means uncensored)
 
         formula: string
-            Use an R-style formula for modeling the dataset. See formula syntax: https://patsy.readthedocs.io/en/latest/quickstart.html
+            Use an R-style formula for modeling the dataset. See formula syntax: https://matthewwardrop.github.io/formulaic/basic/grammar/
 
         ancillary: None, boolean, str, or DataFrame, optional (default=None)
             Choose to model the ancillary parameters.
@@ -3036,7 +3041,7 @@ def fit_left_censoring(
             observation. If left as None, assume all individuals are uncensored.
 
         formula: string
-            Use an R-style formula for modeling the dataset. See formula syntax: https://patsy.readthedocs.io/en/latest/quickstart.html
+            Use an R-style formula for modeling the dataset. See formula syntax: https://matthewwardrop.github.io/formulaic/basic/grammar/
 
         ancillary: None, boolean, str, or DataFrame, optional (default=None)
             Choose to model the ancillary parameters.
diff --git a/lifelines/tests/test_estimation.py b/lifelines/tests/test_estimation.py
index 205e0ceee..695424a72 100644
--- a/lifelines/tests/test_estimation.py
+++ b/lifelines/tests/test_estimation.py
@@ -1911,7 +1911,6 @@ def test_all_models_have_regressors_property(self, rossi, regression_models):
             fitter.fit(rossi, "week", "arrest")
             assert hasattr(fitter, "regressors")
 
-    @pytest.mark.xfail
     def test_pickle_serialization(self, rossi, regression_models):
         for fitter in regression_models:
             fitter.fit(rossi, "week", "arrest")
@@ -1920,7 +1919,6 @@ def test_pickle_serialization(self, rossi, regression_models):
             dif = (fitter.durations - unpickled.durations).sum()
             assert dif == 0
 
-    @pytest.mark.xfail
     def test_dill_serialization(self, rossi, regression_models):
         from dill import dumps, loads
 
@@ -1931,7 +1929,6 @@ def test_dill_serialization(self, rossi, regression_models):
             dif = (fitter.durations - unpickled.durations).sum()
             assert dif == 0
 
-    @pytest.mark.xfail
     def test_joblib_serialization(self, rossi, regression_models):
         from joblib import dump, load
 
diff --git a/lifelines/tests/utils/test_utils.py b/lifelines/tests/utils/test_utils.py
index f7116ff78..a68d61838 100644
--- a/lifelines/tests/utils/test_utils.py
+++ b/lifelines/tests/utils/test_utils.py
@@ -924,7 +924,6 @@ def test_we_can_user_other_prediction_methods(self, X, Y):
         wf.fit(X, Y)
         assert wf.predict(X).shape[0] == X.shape[0]
 
-    @pytest.mark.xfail
     def test_dill(self, X, Y):
         import dill
 
@@ -936,7 +935,6 @@ def test_dill(self, X, Y):
         s = dill.loads(s)
         assert cph.predict(X).shape[0] == X.shape[0]
 
-    @pytest.mark.xfail
     def test_pickle(self, X, Y):
         import pickle
 
@@ -971,7 +969,6 @@ def test_sklearn_GridSearchCV_accept_model_with_parallelization(self, X, Y):
         assert clf.best_params_ == {"l1_ratio": 0.5, "model_ancillary": False, "penalizer": 0.01}
         assert clf.predict(X).shape[0] == X.shape[0]
 
-    @pytest.mark.xfail
     def test_joblib(self, X, Y):
         from joblib import dump, load
 
diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py
index 97acaafd2..d8b4d582b 100644
--- a/lifelines/utils/__init__.py
+++ b/lifelines/utils/__init__.py
@@ -1894,12 +1894,14 @@ def add_intercept_col(cls, df):
 
     def transform_df(self, df: pd.DataFrame):
 
-        import patsy
+        import formulaic
 
         Xs = {}
         for param_name, transform in self.mappings.items():
-            if isinstance(transform, patsy.design_info.DesignInfo):
-                (X,) = patsy.build_design_matrices([transform], df, return_type="dataframe")
+            if isinstance(transform, formulaic.formula.Formula):
+                index = df.index
+                X = transform.get_model_matrix(df)
+                X.index = index
             elif isinstance(transform, list):
                 if self.force_intercept:
                     df = self.add_intercept_col(df)
@@ -1941,11 +1943,11 @@ def _list_seed_transform(self, list_: List):
 
     def _string_seed_transform(self, formula: str, df: pd.DataFrame):
         # user input a formula, hopefully
-        import patsy
+        import formulaic
 
         if self.force_intercept:
             formula += "+ 1"
 
-        _X = patsy.dmatrix(formula, df, 1, NA_action="raise")
+        design_info = formulaic.Formula(formula)
 
-        return _X.design_info
+        return design_info
diff --git a/lifelines/version.py b/lifelines/version.py
index bd5e9b1bf..54acb8a2d 100644
--- a/lifelines/version.py
+++ b/lifelines/version.py
@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-__version__ = "0.25.7"
+__version__ = "0.25.8"
diff --git a/reqs/base-requirements.txt b/reqs/base-requirements.txt
index bd185bc1b..40bc5c80e 100644
--- a/reqs/base-requirements.txt
+++ b/reqs/base-requirements.txt
@@ -4,4 +4,4 @@ pandas>=0.23.0
 matplotlib>=3.0
 autograd>=1.3
 autograd-gamma>=0.3
-patsy>=0.5.0
+formulaic