diff --git a/CHANGELOG.md b/CHANGELOG.md index b44fdb079..b832e3559 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ ## Changelog +#### 0.25.8 - 2021-01-22 + +Important: we dropped Patsy as our formula framework, and adopted Formulaic. Will the latter is less mature than Patsy, we feel the core capabilities are satisfactory and it provides new opportunities. + +##### New features + - Parametric models with formulas are able to be serialized now. + - a `_scipy_callback` function is available to use in fitting algorithms. + + #### 0.25.7 - 2020-12-09 ##### API Changes diff --git a/docs/Changelog.rst b/docs/Changelog.rst index d6e5568d7..e99722660 100644 --- a/docs/Changelog.rst +++ b/docs/Changelog.rst @@ -1,6 +1,22 @@ Changelog ========= +0.25.8 - 2021-01-22 +------------------- + +Important: we dropped Patsy as our formula framework, and adopted +Formulaic. Will the latter is less mature than Patsy, we feel the core +capabilities are satisfactory and it provides new opportunities. + +New features +~~~~~~~~~~~~ + +- Parametric models with formulas are able to be serialized now. +- a ``_scipy_callback`` function is available to use in fitting + algorithms. + +.. _section-1: + 0.25.7 - 2020-12-09 ------------------- @@ -17,11 +33,13 @@ Bug fixes - Fixed ``concordance_index_`` when no events observed - Fixed label being overwritten in ParametricUnivariate models -.. _section-1: +.. _section-2: 0.25.6 - 2020-10-26 ------------------- +.. _new-features-1: + New features ~~~~~~~~~~~~ @@ -40,7 +58,7 @@ Bug fixes - Fix bug in ``KaplanMeierFitter``\ ’s interval censoring where max(lower bound) < min(upper bound). -.. _section-2: +.. _section-3: 0.25.5 - 2020-09-23 ------------------- @@ -65,12 +83,12 @@ Bug fixes parametric models - ``weights`` wasn’t being applied properly in NPMLE -.. _section-3: +.. _section-4: 0.25.4 - 2020-08-26 ------------------- -.. _new-features-1: +.. _new-features-2: New features ~~~~~~~~~~~~ @@ -87,12 +105,12 @@ Bug fixes - fix ``check_assumptions`` when using formulas. -.. _section-4: +.. _section-5: 0.25.3 - 2020-08-24 ------------------- -.. _new-features-2: +.. _new-features-3: New features ~~~~~~~~~~~~ @@ -118,12 +136,12 @@ Bug fixes - fix Python error when calling ``plot_covariate_groups`` - fix dtype mismatches in ``plot_partial_effects_on_outcome``. -.. _section-5: +.. _section-6: 0.25.2 - 2020-08-08 ------------------- -.. _new-features-3: +.. _new-features-4: New features ~~~~~~~~~~~~ @@ -152,7 +170,7 @@ Bug fixes - fix some exception imports I missed. - fix log-likelihood p-value in splines ``CoxPHFitter`` -.. _section-6: +.. _section-7: 0.25.1 - 2020-08-01 ------------------- @@ -168,12 +186,12 @@ Bug fixes - put ``patsy`` as a proper dependency. - suppress some Pandas 1.1 warnings. -.. _section-7: +.. _section-8: 0.25.0 - 2020-07-27 ------------------- -.. _new-features-4: +.. _new-features-5: New features ~~~~~~~~~~~~ @@ -246,12 +264,12 @@ Bug fixes - fixed NaN bug in ``survival_table_from_events`` with intervals when no events would occur in a interval. -.. _section-8: +.. _section-9: 0.24.16 - 2020-07-09 -------------------- -.. _new-features-5: +.. _new-features-6: New features ~~~~~~~~~~~~ @@ -266,7 +284,7 @@ Bug fixes - fixed ``utils.median_survival_time`` not accepting Pandas Series. -.. _section-9: +.. _section-10: 0.24.15 - 2020-07-07 -------------------- @@ -282,7 +300,7 @@ Bug fixes - fixed bug where using ``conditional_after`` and ``times`` in ``CoxPHFitter("spline")`` prediction methods would be ignored. -.. _section-10: +.. _section-11: 0.24.14 - 2020-07-02 -------------------- @@ -299,7 +317,7 @@ Bug fixes - fixed a bug where some columns would not be displayed in ``print_summary`` -.. _section-11: +.. _section-12: 0.24.13 - 2020-06-22 -------------------- @@ -314,24 +332,24 @@ Bug fixes - fixed a bug where ``CoxPHFitter`` would fail with working with ``sklearn_adapter`` -.. _section-12: +.. _section-13: 0.24.12 - 2020-06-20 -------------------- -.. _new-features-6: +.. _new-features-7: New features ~~~~~~~~~~~~ - improved convergence of ``GeneralizedGamma(Regression)Fitter``. -.. _section-13: +.. _section-14: 0.24.11 - 2020-06-17 -------------------- -.. _new-features-7: +.. _new-features-8: New features ~~~~~~~~~~~~ @@ -354,12 +372,12 @@ API Changes penalized by ``penalizer`` - we now penalizing everything except intercept terms in linear relationships. -.. _section-14: +.. _section-15: 0.24.10 - 2020-06-16 -------------------- -.. _new-features-8: +.. _new-features-9: New features ~~~~~~~~~~~~ @@ -384,12 +402,12 @@ Bug fixes - fixed a bug in initialization of some interval-censoring models -> better convergence. -.. _section-15: +.. _section-16: 0.24.9 - 2020-06-05 ------------------- -.. _new-features-9: +.. _new-features-10: New features ~~~~~~~~~~~~ @@ -407,12 +425,12 @@ Bug fixes - Cleared up some mislabeling in ``plot_loglogs``. Thanks @sean-reed! - tuples are now able to be used as input in univariate models. -.. _section-16: +.. _section-17: 0.24.8 - 2020-05-17 ------------------- -.. _new-features-10: +.. _new-features-11: New features ~~~~~~~~~~~~ @@ -421,12 +439,12 @@ New features Not all edge cases are fully checked, and some features are missing. Try it under ``KaplanMeierFitter.fit_interval_censoring`` -.. _section-17: +.. _section-18: 0.24.7 - 2020-05-17 ------------------- -.. _new-features-11: +.. _new-features-12: New features ~~~~~~~~~~~~ @@ -442,12 +460,12 @@ New features - some convergence tweaks which should help recent performance regressions. -.. _section-18: +.. _section-19: 0.24.6 - 2020-05-05 ------------------- -.. _new-features-12: +.. _new-features-13: New features ~~~~~~~~~~~~ @@ -465,12 +483,12 @@ Bug fixes - fixed bug where ``cdf_plot`` and ``qq_plot`` were not factoring in the weights correctly. -.. _section-19: +.. _section-20: 0.24.5 - 2020-05-01 ------------------- -.. _new-features-13: +.. _new-features-14: New features ~~~~~~~~~~~~ @@ -487,7 +505,7 @@ Bug fixes - Improved ``at_risk_counts`` for subplots. - More data validation checks for ``CoxTimeVaryingFitter`` -.. _section-20: +.. _section-21: 0.24.4 - 2020-04-13 ------------------- @@ -501,12 +519,12 @@ Bug fixes - setting a dataframe in ``ancillary_df`` works for interval censoring - ``.score`` works for interval censored models -.. _section-21: +.. _section-22: 0.24.3 - 2020-03-25 ------------------- -.. _new-features-14: +.. _new-features-15: New features ~~~~~~~~~~~~ @@ -524,7 +542,7 @@ Bug fixes - Fixed error in HTML printer that was hiding concordance index information. -.. _section-22: +.. _section-23: 0.24.2 - 2020-03-15 ------------------- @@ -541,12 +559,12 @@ Bug fixes - Fixed a keyword bug in ``plot_covariate_groups`` for parametric models. -.. _section-23: +.. _section-24: 0.24.1 - 2020-03-05 ------------------- -.. _new-features-15: +.. _new-features-16: New features ~~~~~~~~~~~~ @@ -561,7 +579,7 @@ Bug fixes - Fixed bug with plotting hazards in NelsonAalenFitter. -.. _section-24: +.. _section-25: 0.24.0 - 2020-02-20 ------------------- @@ -570,7 +588,7 @@ This version and future versions of lifelines no longer support py35. Pandas 1.0 is fully supported, along with previous versions. Minimum Scipy has been bumped to 1.2.0. -.. _new-features-16: +.. _new-features-17: New features ~~~~~~~~~~~~ @@ -635,7 +653,7 @@ Bug fixes - Cox models now incorporate any penalizers in their ``log_likelihood_`` -.. _section-25: +.. _section-26: 0.23.9 - 2020-01-28 ------------------- @@ -651,7 +669,7 @@ Bug fixes of ``GeneralizedGammaRegressionFitter`` and any custom regression models should update their code as soon as possible. -.. _section-26: +.. _section-27: 0.23.8 - 2020-01-21 ------------------- @@ -667,19 +685,19 @@ Bug fixes ``GeneralizedGammaRegressionFitter`` and any custom regression models should update their code as soon as possible. -.. _section-27: +.. _section-28: 0.23.7 - 2020-01-14 ------------------- Bug fixes for py3.5. -.. _section-28: +.. _section-29: 0.23.6 - 2020-01-07 ------------------- -.. _new-features-17: +.. _new-features-18: New features ~~~~~~~~~~~~ @@ -693,12 +711,12 @@ New features - custom parametric regression models can now do left and interval censoring. -.. _section-29: +.. _section-30: 0.23.5 - 2020-01-05 ------------------- -.. _new-features-18: +.. _new-features-19: New features ~~~~~~~~~~~~ @@ -717,19 +735,19 @@ Bug fixes - fixed bug where large exponential numbers in ``print_summary`` were not being suppressed correctly. -.. _section-30: +.. _section-31: 0.23.4 - 2019-12-15 ------------------- - Bug fix for PyPI -.. _section-31: +.. _section-32: 0.23.3 - 2019-12-11 ------------------- -.. _new-features-19: +.. _new-features-20: New features ~~~~~~~~~~~~ @@ -744,12 +762,12 @@ Bug fixes - fix import in ``printer.py`` - fix html printing with Univariate models. -.. _section-32: +.. _section-33: 0.23.2 - 2019-12-07 ------------------- -.. _new-features-20: +.. _new-features-21: New features ~~~~~~~~~~~~ @@ -770,12 +788,12 @@ Bug fixes - fixed repr for ``sklearn_adapter`` classes. - fixed ``conditional_after`` in Cox model with strata was used. -.. _section-33: +.. _section-34: 0.23.1 - 2019-11-27 ------------------- -.. _new-features-21: +.. _new-features-22: New features ~~~~~~~~~~~~ @@ -797,12 +815,12 @@ Bug fixes - fixed bug when using ``print_summary`` with left censored models. - lots of minor bug fixes. -.. _section-34: +.. _section-35: 0.23.0 - 2019-11-17 ------------------- -.. _new-features-22: +.. _new-features-23: New features ~~~~~~~~~~~~ @@ -833,7 +851,7 @@ API Changes - ``left_censorship`` in ``fit`` has been removed in favour of ``fit_left_censoring``. -.. _section-35: +.. _section-36: 0.22.10 - 2019-11-08 -------------------- @@ -851,7 +869,7 @@ Bug fixes - fixed bug in plot_covariate_groups for AFT models when >1d arrays were used for values arg. -.. _section-36: +.. _section-37: 0.22.9 - 2019-10-30 ------------------- @@ -868,12 +886,12 @@ Bug fixes - ``CoxPHFitter`` now displays correct columns values when changing alpha param. -.. _section-37: +.. _section-38: 0.22.8 - 2019-10-06 ------------------- -.. _new-features-23: +.. _new-features-24: New features ~~~~~~~~~~~~ @@ -890,12 +908,12 @@ Bug fixes - fixed initial_point being ignored in AFT models. -.. _section-38: +.. _section-39: 0.22.7 - 2019-09-29 ------------------- -.. _new-features-24: +.. _new-features-25: New features ~~~~~~~~~~~~ @@ -922,12 +940,12 @@ API Changes - Some previous ``StatisticalWarnings`` have been replaced by ``ApproximationWarning`` -.. _section-39: +.. _section-40: 0.22.6 - 2019-09-25 ------------------- -.. _new-features-25: +.. _new-features-26: New features ~~~~~~~~~~~~ @@ -950,12 +968,12 @@ API Changes - ``utils.dataframe_interpolate_at_times`` renamed to ``utils.interpolate_at_times_and_return_pandas``. -.. _section-40: +.. _section-41: 0.22.5 - 2019-09-20 ------------------- -.. _new-features-26: +.. _new-features-27: New features ~~~~~~~~~~~~ @@ -981,12 +999,12 @@ API Changes - ``_get_initial_value`` in parametric univariate models is renamed ``_create_initial_point`` -.. _section-41: +.. _section-42: 0.22.4 - 2019-09-04 ------------------- -.. _new-features-27: +.. _new-features-28: New features ~~~~~~~~~~~~ @@ -1013,12 +1031,12 @@ Bug fixes - Fixed issue where ``concordance_index`` would never exit if NaNs in dataset. -.. _section-42: +.. _section-43: 0.22.3 - 2019-08-08 ------------------- -.. _new-features-28: +.. _new-features-29: New features ~~~~~~~~~~~~ @@ -1054,12 +1072,12 @@ Bug fixes - Fixed an error in the ``predict_percentile`` of ``LogLogisticAFTFitter``. New tests have been added around this. -.. _section-43: +.. _section-44: 0.22.2 - 2019-07-25 ------------------- -.. _new-features-29: +.. _new-features-30: New features ~~~~~~~~~~~~ @@ -1077,12 +1095,12 @@ Bug fixes errors when using the library. The correctly numpy has been pinned (to 1.14.0+) -.. _section-44: +.. _section-45: 0.22.1 - 2019-07-14 ------------------- -.. _new-features-30: +.. _new-features-31: New features ~~~~~~~~~~~~ @@ -1120,12 +1138,12 @@ Bug fixes - fixed an overflow bug in ``KaplanMeierFitter`` confidence intervals - improvements in data validation for ``CoxTimeVaryingFitter`` -.. _section-45: +.. _section-46: 0.22.0 - 2019-07-03 ------------------- -.. _new-features-31: +.. _new-features-32: New features ~~~~~~~~~~~~ @@ -1168,14 +1186,14 @@ Bug fixes is now exact instead of an approximation. - fixed a name error bug in ``CoxTimeVaryingFitter.plot`` -.. _section-46: +.. _section-47: 0.21.5 - 2019-06-22 ------------------- I’m skipping 0.21.4 version because of deployment issues. -.. _new-features-32: +.. _new-features-33: New features ~~~~~~~~~~~~ @@ -1192,12 +1210,12 @@ Bug fixes - fixed visual bug that misaligned x-axis ticks and at-risk counts. Thanks @christopherahern! -.. _section-47: +.. _section-48: 0.21.3 - 2019-06-04 ------------------- -.. _new-features-33: +.. _new-features-34: New features ~~~~~~~~~~~~ @@ -1218,12 +1236,12 @@ Bug fixes - ``covariates_from_event_matrix`` handle nulls better -.. _section-48: +.. _section-49: 0.21.2 - 2019-05-16 ------------------- -.. _new-features-34: +.. _new-features-35: New features ~~~~~~~~~~~~ @@ -1252,12 +1270,12 @@ API changes Bug fixes ~~~~~~~~~ -.. _section-49: +.. _section-50: 0.21.1 - 2019-04-26 ------------------- -.. _new-features-35: +.. _new-features-36: New features ~~~~~~~~~~~~ @@ -1281,12 +1299,12 @@ Bug fixes - fixed bug in CoxTimeVaryingFitter when ax is provided, thanks @j-i-l! -.. _section-50: +.. _section-51: 0.21.0 - 2019-04-12 ------------------- -.. _new-features-36: +.. _new-features-37: New features ~~~~~~~~~~~~ @@ -1321,12 +1339,12 @@ Bug fixes - Fixed an error that didn’t let users use Numpy arrays in prediction for AFT models -.. _section-51: +.. _section-52: 0.20.5 - 2019-04-08 ------------------- -.. _new-features-37: +.. _new-features-38: New features ~~~~~~~~~~~~ @@ -1352,12 +1370,12 @@ Bug fixes test when using strata. - Fixed some plotting bugs with ``AalenJohansenFitter`` -.. _section-52: +.. _section-53: 0.20.4 - 2019-03-27 ------------------- -.. _new-features-38: +.. _new-features-39: New features ~~~~~~~~~~~~ @@ -1385,12 +1403,12 @@ Bug fixes - ``PiecewiseExponentialFitter`` is available with ``from lifelines import *``. -.. _section-53: +.. _section-54: 0.20.3 - 2019-03-23 ------------------- -.. _new-features-39: +.. _new-features-40: New features ~~~~~~~~~~~~ @@ -1403,12 +1421,12 @@ New features ``plot_survival_function`` and ``confidence_interval_survival_function_``. -.. _section-54: +.. _section-55: 0.20.2 - 2019-03-21 ------------------- -.. _new-features-40: +.. _new-features-41: New features ~~~~~~~~~~~~ @@ -1448,7 +1466,7 @@ Bug fixes the q parameter was below the truncation limit. This should have been ``-np.inf`` -.. _section-55: +.. _section-56: 0.20.1 - 2019-03-16 ------------------- @@ -1472,7 +1490,7 @@ API changes This is no longer the case. A 0 will still be added if there is a duration (observed or not) at 0 occurs however. -.. _section-56: +.. _section-57: 0.20.0 - 2019-03-05 ------------------- @@ -1481,7 +1499,7 @@ API changes recent installs where Py3. - Updated minimum dependencies, specifically Matplotlib and Pandas. -.. _new-features-41: +.. _new-features-42: New features ~~~~~~~~~~~~ @@ -1508,12 +1526,12 @@ Bug fixes - Fixed a bug with plotting and ``check_assumptions``. -.. _section-57: +.. _section-58: 0.19.5 - 2019-02-26 ------------------- -.. _new-features-42: +.. _new-features-43: New features ~~~~~~~~~~~~ @@ -1523,7 +1541,7 @@ New features features or categorical variables. - Convergence improvements for AFT models. -.. _section-58: +.. _section-59: 0.19.4 - 2019-02-25 ------------------- @@ -1535,12 +1553,12 @@ Bug fixes - remove some bad print statements in ``CoxPHFitter``. -.. _section-59: +.. _section-60: 0.19.3 - 2019-02-25 ------------------- -.. _new-features-43: +.. _new-features-44: New features ~~~~~~~~~~~~ @@ -1552,12 +1570,12 @@ New features - Performance increase to ``print_summary`` in the ``CoxPHFitter`` and ``CoxTimeVaryingFitter`` model. -.. _section-60: +.. _section-61: 0.19.2 - 2019-02-22 ------------------- -.. _new-features-44: +.. _new-features-45: New features ~~~~~~~~~~~~ @@ -1575,12 +1593,12 @@ Bug fixes - Univariate fitters are more flexiable and can allow 2-d and DataFrames as inputs. -.. _section-61: +.. _section-62: 0.19.1 - 2019-02-21 ------------------- -.. _new-features-45: +.. _new-features-46: New features ~~~~~~~~~~~~ @@ -1597,12 +1615,12 @@ API changes ``PiecewiseExponential`` to the same as ``ExponentialFitter`` (from ``\lambda * t`` to ``t / \lambda``). -.. _section-62: +.. _section-63: 0.19.0 - 2019-02-20 ------------------- -.. _new-features-46: +.. _new-features-47: New features ~~~~~~~~~~~~ @@ -1658,7 +1676,7 @@ Bug Fixes models. Thanks @airanmehr! - Fixed some Pandas <0.24 bugs. -.. _section-63: +.. _section-64: 0.18.6 - 2019-02-13 ------------------- @@ -1668,7 +1686,7 @@ Bug Fixes ``rank`` and ``km`` p-values now. - some performance improvements to ``qth_survival_time``. -.. _section-64: +.. _section-65: 0.18.5 - 2019-02-11 ------------------- @@ -1689,7 +1707,7 @@ Bug Fixes that can be used to turn off variance calculations since this can take a long time for large datasets. Thanks @pzivich! -.. _section-65: +.. _section-66: 0.18.4 - 2019-02-10 ------------------- @@ -1699,7 +1717,7 @@ Bug Fixes - adding left-truncation support to parametric univarite models with the ``entry`` kwarg in ``.fit`` -.. _section-66: +.. _section-67: 0.18.3 - 2019-02-07 ------------------- @@ -1709,7 +1727,7 @@ Bug Fixes warnings are more noticeable. - Improved some warning and error messages. -.. _section-67: +.. _section-68: 0.18.2 - 2019-02-05 ------------------- @@ -1725,7 +1743,7 @@ Bug Fixes Moved them all (most) to use ``autograd``. - ``LogNormalFitter`` no longer models ``log_sigma``. -.. _section-68: +.. _section-69: 0.18.1 - 2019-02-02 ------------------- @@ -1736,7 +1754,7 @@ Bug Fixes - use the ``autograd`` lib to help with gradients. - New ``LogLogisticFitter`` univariate fitter available. -.. _section-69: +.. _section-70: 0.18.0 - 2019-01-31 ------------------- @@ -1773,7 +1791,7 @@ Bug Fixes ``LinAlgError: Matrix is singular.`` and report back to the user advice. -.. _section-70: +.. _section-71: 0.17.5 - 2019-01-25 ------------------- @@ -1781,7 +1799,7 @@ Bug Fixes - more bugs in ``plot_covariate_groups`` fixed when using non-numeric strata. -.. _section-71: +.. _section-72: 0.17.4 -2019-01-25 ------------------ @@ -1793,7 +1811,7 @@ Bug Fixes - ``groups`` is now called ``values`` in ``CoxPHFitter.plot_covariate_groups`` -.. _section-72: +.. _section-73: 0.17.3 - 2019-01-24 ------------------- @@ -1801,7 +1819,7 @@ Bug Fixes - Fix in ``compute_residuals`` when using ``schoenfeld`` and the minumum duration has only censored subjects. -.. _section-73: +.. _section-74: 0.17.2 2019-01-22 ----------------- @@ -1812,7 +1830,7 @@ Bug Fixes ``for`` loop. The downside is the code is more esoteric now. I’ve added comments as necessary though 🤞 -.. _section-74: +.. _section-75: 0.17.1 - 2019-01-20 ------------------- @@ -1829,7 +1847,7 @@ Bug Fixes - Fixes a Pandas performance warning in ``CoxTimeVaryingFitter``. - Performances improvements to ``CoxTimeVaryingFitter``. -.. _section-75: +.. _section-76: 0.17.0 - 2019-01-11 ------------------- @@ -1850,7 +1868,7 @@ Bug Fixes - some plotting improvemnts to ``plotting.plot_lifetimes`` -.. _section-76: +.. _section-77: 0.16.3 - 2019-01-03 ------------------- @@ -1858,7 +1876,7 @@ Bug Fixes - More ``CoxPHFitter`` performance improvements. Up to a 40% reduction vs 0.16.2 for some datasets. -.. _section-77: +.. _section-78: 0.16.2 - 2019-01-02 ------------------- @@ -1869,14 +1887,14 @@ Bug Fixes has lots of duplicate times. See https://github.com/CamDavidsonPilon/lifelines/issues/591 -.. _section-78: +.. _section-79: 0.16.1 - 2019-01-01 ------------------- - Fixed py2 division error in ``concordance`` method. -.. _section-79: +.. _section-80: 0.16.0 - 2019-01-01 ------------------- @@ -1912,7 +1930,7 @@ Bug Fixes ``lifelines.utils.to_episodic_format``. - ``CoxTimeVaryingFitter`` now accepts ``strata``. -.. _section-80: +.. _section-81: 0.15.4 ------ @@ -1920,14 +1938,14 @@ Bug Fixes - bug fix for the Cox model likelihood ratio test when using non-trivial weights. -.. _section-81: +.. _section-82: 0.15.3 - 2018-12-18 ------------------- - Only allow matplotlib less than 3.0. -.. _section-82: +.. _section-83: 0.15.2 - 2018-11-23 ------------------- @@ -1938,7 +1956,7 @@ Bug Fixes - removed ``entry`` from ``ExponentialFitter`` and ``WeibullFitter`` as it was doing nothing. -.. _section-83: +.. _section-84: 0.15.1 - 2018-11-23 ------------------- @@ -1947,7 +1965,7 @@ Bug Fixes - Raise NotImplementedError if the ``robust`` flag is used in ``CoxTimeVaryingFitter`` - that’s not ready yet. -.. _section-84: +.. _section-85: 0.15.0 - 2018-11-22 ------------------- @@ -2018,7 +2036,7 @@ Bug Fixes When Estimating Risks in Pharmacoepidemiology” for a nice overview of the model. -.. _section-85: +.. _section-86: 0.14.6 - 2018-07-02 ------------------- @@ -2026,7 +2044,7 @@ Bug Fixes - fix for n > 2 groups in ``multivariate_logrank_test`` (again). - fix bug for when ``event_observed`` column was not boolean. -.. _section-86: +.. _section-87: 0.14.5 - 2018-06-29 ------------------- @@ -2034,7 +2052,7 @@ Bug Fixes - fix for n > 2 groups in ``multivariate_logrank_test`` - fix weights in KaplanMeierFitter when using a pandas Series. -.. _section-87: +.. _section-88: 0.14.4 - 2018-06-14 ------------------- @@ -2051,7 +2069,7 @@ Bug Fixes - New ``delay`` parameter in ``add_covariate_to_timeline`` - removed ``two_sided_z_test`` from ``statistics`` -.. _section-88: +.. _section-89: 0.14.3 - 2018-05-24 ------------------- @@ -2063,7 +2081,7 @@ Bug Fixes - adds a ``column`` argument to ``CoxTimeVaryingFitter`` and ``CoxPHFitter`` ``plot`` method to plot only a subset of columns. -.. _section-89: +.. _section-90: 0.14.2 - 2018-05-18 ------------------- @@ -2071,7 +2089,7 @@ Bug Fixes - some quality of life improvements for working with ``CoxTimeVaryingFitter`` including new ``predict_`` methods. -.. _section-90: +.. _section-91: 0.14.1 - 2018-04-01 ------------------- @@ -2089,7 +2107,7 @@ Bug Fixes faster completion of ``fit`` for large dataframes, and up to 10% faster for small dataframes. -.. _section-91: +.. _section-92: 0.14.0 - 2018-03-03 ------------------- @@ -2111,7 +2129,7 @@ Bug Fixes of a ``RuntimeWarning`` - New checks for complete separation in the dataset for regressions. -.. _section-92: +.. _section-93: 0.13.0 - 2017-12-22 ------------------- @@ -2140,7 +2158,7 @@ Bug Fixes group the same subjects together and give that observation a weight equal to the count. Altogether, this means a much faster regression. -.. _section-93: +.. _section-94: 0.12.0 ------ @@ -2157,7 +2175,7 @@ Bug Fixes - Additional functionality to ``utils.survival_table_from_events`` to bin the index to make the resulting table more readable. -.. _section-94: +.. _section-95: 0.11.3 ------ @@ -2169,7 +2187,7 @@ Bug Fixes observation or censorship. - More accurate prediction methods parametrics univariate models. -.. _section-95: +.. _section-96: 0.11.2 ------ @@ -2177,14 +2195,14 @@ Bug Fixes - Changing liscense to valilla MIT. - Speed up ``NelsonAalenFitter.fit`` considerably. -.. _section-96: +.. _section-97: 0.11.1 - 2017-06-22 ------------------- - Python3 fix for ``CoxPHFitter.plot``. -.. _section-97: +.. _section-98: 0.11.0 - 2017-06-21 ------------------- @@ -2198,14 +2216,14 @@ Bug Fixes of a new ``loc`` kwarg. This is to align with Pandas deprecating ``ix`` -.. _section-98: +.. _section-99: 0.10.1 - 2017-06-05 ------------------- - fix in internal normalization for ``CoxPHFitter`` predict methods. -.. _section-99: +.. _section-100: 0.10.0 ------ @@ -2220,7 +2238,7 @@ Bug Fixes mimic R’s ``basehaz`` API. - new ``predict_log_partial_hazards`` to ``CoxPHFitter`` -.. _section-100: +.. _section-101: 0.9.4 ----- @@ -2243,7 +2261,7 @@ Bug Fixes - performance improvements in ``CoxPHFitter`` - should see at least a 10% speed improvement in ``fit``. -.. _section-101: +.. _section-102: 0.9.2 ----- @@ -2252,7 +2270,7 @@ Bug Fixes - throw an error if no admissable pairs in the c-index calculation. Previously a NaN was returned. -.. _section-102: +.. _section-103: 0.9.1 ----- @@ -2260,7 +2278,7 @@ Bug Fixes - add two summary functions to Weibull and Exponential fitter, solves #224 -.. _section-103: +.. _section-104: 0.9.0 ----- @@ -2276,7 +2294,7 @@ Bug Fixes - Default predict method in ``k_fold_cross_validation`` is now ``predict_expectation`` -.. _section-104: +.. _section-105: 0.8.1 - 2015-08-01 ------------------ @@ -2293,7 +2311,7 @@ Bug Fixes - scaling of smooth hazards in NelsonAalenFitter was off by a factor of 0.5. -.. _section-105: +.. _section-106: 0.8.0 ----- @@ -2312,7 +2330,7 @@ Bug Fixes ``lifelines.statistics. power_under_cph``. - fixed a bug when using KaplanMeierFitter for left-censored data. -.. _section-106: +.. _section-107: 0.7.1 ----- @@ -2331,7 +2349,7 @@ Bug Fixes - refactor each fitter into it’s own submodule. For now, the tests are still in the same file. This will also *not* break the API. -.. _section-107: +.. _section-108: 0.7.0 - 2015-03-01 ------------------ @@ -2350,7 +2368,7 @@ Bug Fixes duration remaining until the death event, given survival up until time t. -.. _section-108: +.. _section-109: 0.6.1 ----- @@ -2362,7 +2380,7 @@ Bug Fixes your work is to sum up the survival function (for expected values or something similar), it’s more difficult to make a mistake. -.. _section-109: +.. _section-110: 0.6.0 - 2015-02-04 ------------------ @@ -2385,7 +2403,7 @@ Bug Fixes - In ``KaplanMeierFitter``, ``epsilon`` has been renamed to ``precision``. -.. _section-110: +.. _section-111: 0.5.1 - 2014-12-24 ------------------ @@ -2406,7 +2424,7 @@ Bug Fixes ``lifelines.plotting.add_at_risk_counts``. - Fix bug Epanechnikov kernel. -.. _section-111: +.. _section-112: 0.5.0 - 2014-12-07 ------------------ @@ -2419,7 +2437,7 @@ Bug Fixes - add test for summary() - Alternate metrics can be used for ``k_fold_cross_validation``. -.. _section-112: +.. _section-113: 0.4.4 - 2014-11-27 ------------------ @@ -2431,7 +2449,7 @@ Bug Fixes - Fixes bug in 1-d input not returning in CoxPHFitter - Lots of new tests. -.. _section-113: +.. _section-114: 0.4.3 - 2014-07-23 ------------------ @@ -2452,7 +2470,7 @@ Bug Fixes - Adds option ``include_likelihood`` to CoxPHFitter fit method to save the final log-likelihood value. -.. _section-114: +.. _section-115: 0.4.2 - 2014-06-19 ------------------ @@ -2472,7 +2490,7 @@ Bug Fixes from failing so often (this a stop-gap) - pep8 everything -.. _section-115: +.. _section-116: 0.4.1.1 ------- @@ -2485,7 +2503,7 @@ Bug Fixes - Adding more robust cross validation scheme based on issue #67. - fixing ``regression_dataset`` in ``datasets``. -.. _section-116: +.. _section-117: 0.4.1 - 2014-06-11 ------------------ @@ -2504,7 +2522,7 @@ Bug Fixes - Adding a Changelog. - more sanitizing for the statistical tests =) -.. _section-117: +.. _section-118: 0.4.0 - 2014-06-08 ------------------ diff --git a/docs/Survival Regression.rst b/docs/Survival Regression.rst index e921f5701..d6b84e9b2 100644 --- a/docs/Survival Regression.rst +++ b/docs/Survival Regression.rst @@ -999,32 +999,6 @@ located under :class:`~lifelines.fitters.aalen_additive_fitter.AalenAdditiveFitt +-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+-------------+-------------+----------+--------+--------+ -I'm using the lovely library `Patsy `__ here to create a -design matrix from my original DataFrame. - -.. code:: python - - import patsy - X = patsy.dmatrix('un_continent_name + regime + start_year', data, return_type='dataframe') - X = X.rename(columns={'Intercept': 'baseline'}) - - print(X.columns.tolist()) - - -.. parsed-literal:: - - ['baseline', - 'un_continent_name[T.Americas]', - 'un_continent_name[T.Asia]', - 'un_continent_name[T.Europe]', - 'un_continent_name[T.Oceania]', - 'regime[T.Military Dict]', - 'regime[T.Mixed Dem]', - 'regime[T.Monarchy]', - 'regime[T.Parliamentary Dem]', - 'regime[T.Presidential Dem]', - 'start_year'] - We have also included the ``coef_penalizer`` option. During the estimation, a linear regression is computed at each step. Often the regression can be @@ -1047,7 +1021,7 @@ two individual columns: a *duration* column and a boolean *event occurred* colum .. code:: python - aaf.fit(X, 'T', event_col='E') + aaf.fit(X, 'T', event_col='E', formula='un_continent_name + regime + start_year') After fitting, the instance exposes a :attr:`~lifelines.fitters.aalen_additive_fitter.AalenAdditiveFitter.cumulative_hazards_` DataFrame diff --git a/lifelines/fitters/__init__.py b/lifelines/fitters/__init__.py index 49f0c107c..cefe2c891 100644 --- a/lifelines/fitters/__init__.py +++ b/lifelines/fitters/__init__.py @@ -288,6 +288,7 @@ class ParametricUnivariateFitter(UnivariateFitter): _MIN_PARAMETER_VALUE = 1e-9 _scipy_fit_method = "L-BFGS-B" _scipy_fit_options: Dict[str, Any] = dict() + _scipy_fit_callback = None _fitted_parameter_names: List[str] def __init__(self, *args, **kwargs): @@ -547,6 +548,7 @@ def _fit_model(self, Ts, E, entry, weights, show_progress=True): args=(Ts, E, entry, weights), bounds=self._bounds, options=option, + callback=self._scipy_fit_callback, ) previous_results = results @@ -1258,9 +1260,10 @@ def _compute_central_values_of_raw_training_data(self, df, strata=None, name="ba else: from distutils.version import LooseVersion - if LooseVersion(pd.__version__) >= '1.1.0': + + if LooseVersion(pd.__version__) >= "1.1.0": # silence deprecation warning - describe_kwarg = {'datetime_is_numeric': True} + describe_kwarg = {"datetime_is_numeric": True} else: describe_kwarg = {} described = df.describe(include="all", **describe_kwarg) @@ -1317,6 +1320,7 @@ class ParametricRegressionFitter(RegressionFitter): _scipy_fit_method = "BFGS" _scipy_fit_options: Dict[str, Any] = dict() + _scipy_fit_callback = None fit_intercept = False force_no_intercept = False regressors = None @@ -1803,8 +1807,8 @@ def _fit( # https://github.com/CamDavidsonPilon/lifelines/issues/931 assert list(self.regressors.keys()) == list(self._norm_std.index.get_level_values(0).unique()) _params = np.concatenate([_params[k] for k in self.regressors.keys()]) - self.params_ = _params / self._norm_std + self.params_ = _params / self._norm_std self.variance_matrix_ = pd.DataFrame(self._compute_variance_matrix(), index=_index, columns=_index) self.standard_errors_ = self._compute_standard_errors(Ts, E.values, weights.values, entries.values, Xs) self.confidence_intervals_ = self._compute_confidence_intervals() @@ -1902,6 +1906,7 @@ def _fit_model(self, likelihood, Ts, Xs, E, weights, entries, show_progress=Fals jac=True, args=(Ts, E, weights, entries, utils.DataframeSlicer(Xs)), options={**{"disp": show_progress}, **self._scipy_fit_options}, + callback=self._scipy_fit_callback, ) if results.fun < minimum_ll: @@ -2704,7 +2709,7 @@ def fit( diagnostics. Useful if convergence is failing. formula: string - Use an R-style formula for modeling the dataset. See formula syntax: https://patsy.readthedocs.io/en/latest/quickstart.html + Use an R-style formula for modeling the dataset. See formula syntax: https://matthewwardrop.github.io/formulaic/basic/grammar/ ancillary: None, boolean, str, or DataFrame, optional (default=None) Choose to model the ancillary parameters. @@ -2859,7 +2864,7 @@ def fit_interval_censoring( observation. If left as None, will be inferred from the start and stop columns (lower_bound==upper_bound means uncensored) formula: string - Use an R-style formula for modeling the dataset. See formula syntax: https://patsy.readthedocs.io/en/latest/quickstart.html + Use an R-style formula for modeling the dataset. See formula syntax: https://matthewwardrop.github.io/formulaic/basic/grammar/ ancillary: None, boolean, str, or DataFrame, optional (default=None) Choose to model the ancillary parameters. @@ -3036,7 +3041,7 @@ def fit_left_censoring( observation. If left as None, assume all individuals are uncensored. formula: string - Use an R-style formula for modeling the dataset. See formula syntax: https://patsy.readthedocs.io/en/latest/quickstart.html + Use an R-style formula for modeling the dataset. See formula syntax: https://matthewwardrop.github.io/formulaic/basic/grammar/ ancillary: None, boolean, str, or DataFrame, optional (default=None) Choose to model the ancillary parameters. diff --git a/lifelines/tests/test_estimation.py b/lifelines/tests/test_estimation.py index 205e0ceee..695424a72 100644 --- a/lifelines/tests/test_estimation.py +++ b/lifelines/tests/test_estimation.py @@ -1911,7 +1911,6 @@ def test_all_models_have_regressors_property(self, rossi, regression_models): fitter.fit(rossi, "week", "arrest") assert hasattr(fitter, "regressors") - @pytest.mark.xfail def test_pickle_serialization(self, rossi, regression_models): for fitter in regression_models: fitter.fit(rossi, "week", "arrest") @@ -1920,7 +1919,6 @@ def test_pickle_serialization(self, rossi, regression_models): dif = (fitter.durations - unpickled.durations).sum() assert dif == 0 - @pytest.mark.xfail def test_dill_serialization(self, rossi, regression_models): from dill import dumps, loads @@ -1931,7 +1929,6 @@ def test_dill_serialization(self, rossi, regression_models): dif = (fitter.durations - unpickled.durations).sum() assert dif == 0 - @pytest.mark.xfail def test_joblib_serialization(self, rossi, regression_models): from joblib import dump, load diff --git a/lifelines/tests/utils/test_utils.py b/lifelines/tests/utils/test_utils.py index f7116ff78..a68d61838 100644 --- a/lifelines/tests/utils/test_utils.py +++ b/lifelines/tests/utils/test_utils.py @@ -924,7 +924,6 @@ def test_we_can_user_other_prediction_methods(self, X, Y): wf.fit(X, Y) assert wf.predict(X).shape[0] == X.shape[0] - @pytest.mark.xfail def test_dill(self, X, Y): import dill @@ -936,7 +935,6 @@ def test_dill(self, X, Y): s = dill.loads(s) assert cph.predict(X).shape[0] == X.shape[0] - @pytest.mark.xfail def test_pickle(self, X, Y): import pickle @@ -971,7 +969,6 @@ def test_sklearn_GridSearchCV_accept_model_with_parallelization(self, X, Y): assert clf.best_params_ == {"l1_ratio": 0.5, "model_ancillary": False, "penalizer": 0.01} assert clf.predict(X).shape[0] == X.shape[0] - @pytest.mark.xfail def test_joblib(self, X, Y): from joblib import dump, load diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py index 97acaafd2..d8b4d582b 100644 --- a/lifelines/utils/__init__.py +++ b/lifelines/utils/__init__.py @@ -1894,12 +1894,14 @@ def add_intercept_col(cls, df): def transform_df(self, df: pd.DataFrame): - import patsy + import formulaic Xs = {} for param_name, transform in self.mappings.items(): - if isinstance(transform, patsy.design_info.DesignInfo): - (X,) = patsy.build_design_matrices([transform], df, return_type="dataframe") + if isinstance(transform, formulaic.formula.Formula): + index = df.index + X = transform.get_model_matrix(df) + X.index = index elif isinstance(transform, list): if self.force_intercept: df = self.add_intercept_col(df) @@ -1941,11 +1943,11 @@ def _list_seed_transform(self, list_: List): def _string_seed_transform(self, formula: str, df: pd.DataFrame): # user input a formula, hopefully - import patsy + import formulaic if self.force_intercept: formula += "+ 1" - _X = patsy.dmatrix(formula, df, 1, NA_action="raise") + design_info = formulaic.Formula(formula) - return _X.design_info + return design_info diff --git a/lifelines/version.py b/lifelines/version.py index bd5e9b1bf..54acb8a2d 100644 --- a/lifelines/version.py +++ b/lifelines/version.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -__version__ = "0.25.7" +__version__ = "0.25.8" diff --git a/reqs/base-requirements.txt b/reqs/base-requirements.txt index bd185bc1b..40bc5c80e 100644 --- a/reqs/base-requirements.txt +++ b/reqs/base-requirements.txt @@ -4,4 +4,4 @@ pandas>=0.23.0 matplotlib>=3.0 autograd>=1.3 autograd-gamma>=0.3 -patsy>=0.5.0 +formulaic