From 2d52a96c50f590631ec78e7ec17549a93b1bf6a1 Mon Sep 17 00:00:00 2001 From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com> Date: Wed, 4 Sep 2024 12:02:55 -0400 Subject: [PATCH 01/10] added negative binomial file --- ngboost/distns/negative_binomial.py | 73 +++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 ngboost/distns/negative_binomial.py diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py new file mode 100644 index 0000000..102bd8a --- /dev/null +++ b/ngboost/distns/negative_binomial.py @@ -0,0 +1,73 @@ +"""The NGBoost NegativeBinomial distribution and scores""" +import numpy as np +from scipy.stats import nbinom as dist +from scipy.special import digamma +from scipy.optimize import Bounds, minimize +import warnings + +from ngboost.distns.distn import RegressionDistn +from ngboost.scores import LogScore + +#helper function because scipy doesn't provide a fit function natively +def negative_log_likelihood(params,k): + return -dist.logpmf(k = k, n = params[0], p = params[1]).sum() + +class NegativeBinomialLogScore(LogScore): + + def score(self, Y): + return -self.dist.logpmf(Y) + + def d_score(self, Y): + D = np.zeros((len(Y),2)) + D[:,0] = -self.n * (digamma(Y + self.n) + np.log(self.p) - digamma(self.n)) + D[:,1] = (Y * np.exp(self.z) - self.n)/(np.exp(self.z) + 1) + return D + + def metric(self): + FI = np.zeros((self.n.shape[0], 2, 2)) + FI[:, 0, 0] = (self.n * self.p)/(self.p + 1) + FI[:, 1, 1] = self.n * self.p + return FI + +class NegativeBinomial(RegressionDistn): + + n_params = 2 + scores = [NegativeBinomialLogScore] + + def __init__(self,params): + + self.logn = params[0] + self.n = np.exp(self.logn) + #z = log(p/(1-p)) => p = 1/(1 + e^(-z)) + self.z = params[1] + self.p = 1/(1 + np.exp(-self.z)) + self.dist = dist(n = self.n, p = self.p) + + def fit(Y): + assert np.equal( + np.mod(Y, 1), 0 + ).all(), "All Negative Binomial target data must be discrete integers" + assert np.all([y >= 0 for y in Y]), "Count data must be >= 0" + + m = minimize( + negative_log_likelihood, + x0=np.array([np.max(Y),.5]), # initialized value + args=(Y,), + bounds=Bounds((1e-8,1e-8),(np.inf,1-1e-8)), + + ) + return np.array([np.log(m.x[0]), np.log(m.x[1]/(1 - m.x[1]))]) + + def sample(self,m): + return np.array([self.dist.rvs() for i in range(m)]) + + def __getattr__( + self, name + ): # gives us access to NegativeBinomial.mean() required for RegressionDist.predict() + if name in dir(self.dist): + return getattr(self.dist, name) + return None + + @property + def params(self): + return {'n':self.n, 'p':self.p} \ No newline at end of file From 2bccf2b176fe7c805d178f361606f63c293dd54c Mon Sep 17 00:00:00 2001 From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com> Date: Wed, 4 Sep 2024 12:03:40 -0400 Subject: [PATCH 02/10] added negative binomial to init --- ngboost/distns/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ngboost/distns/__init__.py b/ngboost/distns/__init__.py index 05e9412..11e96dc 100644 --- a/ngboost/distns/__init__.py +++ b/ngboost/distns/__init__.py @@ -7,6 +7,7 @@ from .laplace import Laplace from .lognormal import LogNormal from .multivariate_normal import MultivariateNormal +from .negative_binomial import NegativeBinomial from .normal import Normal, NormalFixedMean, NormalFixedVar from .poisson import Poisson from .t import T, TFixedDf, TFixedDfFixedVar @@ -23,6 +24,7 @@ "Laplace", "LogNormal", "MultivariateNormal", + "NegativeBinomial", "Normal", "NormalFixedMean", "NormalFixedVar", From 58d04f2868814705c37f4db91df9723595ac403a Mon Sep 17 00:00:00 2001 From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com> Date: Wed, 4 Sep 2024 12:04:28 -0400 Subject: [PATCH 03/10] added negative binomial score testing --- tests/test_score.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_score.py b/tests/test_score.py index 3d291a5..941e4a6 100644 --- a/tests/test_score.py +++ b/tests/test_score.py @@ -11,6 +11,7 @@ Gamma, Laplace, MultivariateNormal, + NegativeBinomial, Normal, Poisson, T, @@ -100,6 +101,7 @@ def idfn(dist_score: DistScore): (Laplace, LogScore), (Poisson, LogScore), (Gamma, LogScore), + (NegativeBinomial,LogScore), ] + [(MultivariateNormal(i), LogScore) for i in range(2, 5)] # Fill in the dist, score pair to test the gradient # Tests all in TEST_METRIC by default From 0666534cba0971583e3fb1d486edf766d5dadc68 Mon Sep 17 00:00:00 2001 From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com> Date: Wed, 4 Sep 2024 12:06:08 -0400 Subject: [PATCH 04/10] remove warnings import --- ngboost/distns/negative_binomial.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py index 102bd8a..cceb0ed 100644 --- a/ngboost/distns/negative_binomial.py +++ b/ngboost/distns/negative_binomial.py @@ -3,7 +3,6 @@ from scipy.stats import nbinom as dist from scipy.special import digamma from scipy.optimize import Bounds, minimize -import warnings from ngboost.distns.distn import RegressionDistn from ngboost.scores import LogScore From 5e013f85c95e7777869c6cf69a1ec453cdb0552f Mon Sep 17 00:00:00 2001 From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com> Date: Wed, 4 Sep 2024 12:22:06 -0400 Subject: [PATCH 05/10] saved params to class variable --- ngboost/distns/negative_binomial.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py index cceb0ed..ea39740 100644 --- a/ngboost/distns/negative_binomial.py +++ b/ngboost/distns/negative_binomial.py @@ -34,6 +34,8 @@ class NegativeBinomial(RegressionDistn): scores = [NegativeBinomialLogScore] def __init__(self,params): + # save the parameters + self._params = params self.logn = params[0] self.n = np.exp(self.logn) From c23bdaacff2e911555acddf241aa8b0f06408ff9 Mon Sep 17 00:00:00 2001 From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com> Date: Wed, 4 Sep 2024 17:25:59 -0400 Subject: [PATCH 06/10] disable super init not called --- ngboost/distns/negative_binomial.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py index ea39740..57bc97a 100644 --- a/ngboost/distns/negative_binomial.py +++ b/ngboost/distns/negative_binomial.py @@ -33,6 +33,7 @@ class NegativeBinomial(RegressionDistn): n_params = 2 scores = [NegativeBinomialLogScore] + # pylint: disable=super-init-not-called def __init__(self,params): # save the parameters self._params = params From d95ce01253352b97d953b64a125b668935a502b3 Mon Sep 17 00:00:00 2001 From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com> Date: Wed, 11 Sep 2024 10:01:50 -0400 Subject: [PATCH 07/10] lint formatting --- ngboost/distns/negative_binomial.py | 46 ++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py index 57bc97a..267dc52 100644 --- a/ngboost/distns/negative_binomial.py +++ b/ngboost/distns/negative_binomial.py @@ -7,26 +7,27 @@ from ngboost.distns.distn import RegressionDistn from ngboost.scores import LogScore -#helper function because scipy doesn't provide a fit function natively -def negative_log_likelihood(params,k): - return -dist.logpmf(k = k, n = params[0], p = params[1]).sum() +# helper function because scipy doesn't provide a fit function natively +def negative_log_likelihood(params, k): + return -dist.logpmf(k=k, n=params[0], p=params[1]).sum() -class NegativeBinomialLogScore(LogScore): +class NegativeBinomialLogScore(LogScore): def score(self, Y): return -self.dist.logpmf(Y) - + def d_score(self, Y): - D = np.zeros((len(Y),2)) - D[:,0] = -self.n * (digamma(Y + self.n) + np.log(self.p) - digamma(self.n)) - D[:,1] = (Y * np.exp(self.z) - self.n)/(np.exp(self.z) + 1) + D = np.zeros((len(Y), 2)) + D[:, 0] = -self.n * (digamma(Y + self.n) + np.log(self.p) - digamma(self.n)) + D[:, 1] = (Y * np.exp(self.z) - self.n) / (np.exp(self.z) + 1) return D - + def metric(self): FI = np.zeros((self.n.shape[0], 2, 2)) - FI[:, 0, 0] = (self.n * self.p)/(self.p + 1) + FI[:, 0, 0] = (self.n * self.p) / (self.p + 1) FI[:, 1, 1] = self.n * self.p - return FI + return FI + class NegativeBinomial(RegressionDistn): @@ -34,16 +35,16 @@ class NegativeBinomial(RegressionDistn): scores = [NegativeBinomialLogScore] # pylint: disable=super-init-not-called - def __init__(self,params): + def __init__(self, params): # save the parameters self._params = params self.logn = params[0] self.n = np.exp(self.logn) - #z = log(p/(1-p)) => p = 1/(1 + e^(-z)) + # z = log(p/(1-p)) => p = 1/(1 + e^(-z)) self.z = params[1] - self.p = 1/(1 + np.exp(-self.z)) - self.dist = dist(n = self.n, p = self.p) + self.p = 1 / (1 + np.exp(-self.z)) + self.dist = dist(n=self.n, p=self.p) def fit(Y): assert np.equal( @@ -53,14 +54,13 @@ def fit(Y): m = minimize( negative_log_likelihood, - x0=np.array([np.max(Y),.5]), # initialized value + x0=np.array([np.max(Y), 0.5]), # initialized value args=(Y,), - bounds=Bounds((1e-8,1e-8),(np.inf,1-1e-8)), - + bounds=Bounds((1e-8, 1e-8), (np.inf, 1 - 1e-8)), ) - return np.array([np.log(m.x[0]), np.log(m.x[1]/(1 - m.x[1]))]) - - def sample(self,m): + return np.array([np.log(m.x[0]), np.log(m.x[1] / (1 - m.x[1]))]) + + def sample(self, m): return np.array([self.dist.rvs() for i in range(m)]) def __getattr__( @@ -69,7 +69,7 @@ def __getattr__( if name in dir(self.dist): return getattr(self.dist, name) return None - + @property def params(self): - return {'n':self.n, 'p':self.p} \ No newline at end of file + return {"n": self.n, "p": self.p} From 14b715269661ffd406d59e2419ef7d5220fc23c4 Mon Sep 17 00:00:00 2001 From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com> Date: Wed, 11 Sep 2024 10:02:14 -0400 Subject: [PATCH 08/10] more formatting --- tests/test_score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_score.py b/tests/test_score.py index 941e4a6..c67b4ec 100644 --- a/tests/test_score.py +++ b/tests/test_score.py @@ -101,7 +101,7 @@ def idfn(dist_score: DistScore): (Laplace, LogScore), (Poisson, LogScore), (Gamma, LogScore), - (NegativeBinomial,LogScore), + (NegativeBinomial, LogScore), ] + [(MultivariateNormal(i), LogScore) for i in range(2, 5)] # Fill in the dist, score pair to test the gradient # Tests all in TEST_METRIC by default From 21e02d3b909b0e93c650a34d9e9d6ab420cdb8fc Mon Sep 17 00:00:00 2001 From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com> Date: Wed, 11 Sep 2024 10:22:48 -0400 Subject: [PATCH 09/10] isort changes --- ngboost/distns/negative_binomial.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py index 267dc52..bb204ba 100644 --- a/ngboost/distns/negative_binomial.py +++ b/ngboost/distns/negative_binomial.py @@ -1,12 +1,13 @@ """The NGBoost NegativeBinomial distribution and scores""" import numpy as np -from scipy.stats import nbinom as dist -from scipy.special import digamma from scipy.optimize import Bounds, minimize +from scipy.special import digamma +from scipy.stats import nbinom as dist from ngboost.distns.distn import RegressionDistn from ngboost.scores import LogScore + # helper function because scipy doesn't provide a fit function natively def negative_log_likelihood(params, k): return -dist.logpmf(k=k, n=params[0], p=params[1]).sum() From 3f0786fa0b46e28535f6deef53501d432ab6e9c1 Mon Sep 17 00:00:00 2001 From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com> Date: Wed, 11 Sep 2024 17:42:54 -0400 Subject: [PATCH 10/10] formatting --- ngboost/distns/negative_binomial.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py index bb204ba..ada885a 100644 --- a/ngboost/distns/negative_binomial.py +++ b/ngboost/distns/negative_binomial.py @@ -26,7 +26,9 @@ def d_score(self, Y): def metric(self): FI = np.zeros((self.n.shape[0], 2, 2)) FI[:, 0, 0] = (self.n * self.p) / (self.p + 1) - FI[:, 1, 1] = self.n * self.p + FI[:, 1, 0] = (self.p - 1) * self.n + FI[:, 0, 1] = (self.p - 1) * self.n + FI[:, 1, 1] = self.n * (1 - self.p) return FI