From 2d52a96c50f590631ec78e7ec17549a93b1bf6a1 Mon Sep 17 00:00:00 2001
From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com>
Date: Wed, 4 Sep 2024 12:02:55 -0400
Subject: [PATCH 01/10] added negative binomial file

---
 ngboost/distns/negative_binomial.py | 73 +++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 ngboost/distns/negative_binomial.py

diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py
new file mode 100644
index 0000000..102bd8a
--- /dev/null
+++ b/ngboost/distns/negative_binomial.py
@@ -0,0 +1,73 @@
+"""The NGBoost NegativeBinomial distribution and scores"""
+import numpy as np
+from scipy.stats import nbinom as dist
+from scipy.special import digamma
+from scipy.optimize import Bounds, minimize
+import warnings
+
+from ngboost.distns.distn import RegressionDistn
+from ngboost.scores import LogScore
+
+#helper function because scipy doesn't provide a fit function natively
+def negative_log_likelihood(params,k):
+    return -dist.logpmf(k = k, n = params[0], p = params[1]).sum()
+
+class NegativeBinomialLogScore(LogScore):
+
+    def score(self, Y):
+        return -self.dist.logpmf(Y)
+    
+    def d_score(self, Y):
+        D = np.zeros((len(Y),2))
+        D[:,0] = -self.n * (digamma(Y + self.n) + np.log(self.p) - digamma(self.n))
+        D[:,1] = (Y * np.exp(self.z) - self.n)/(np.exp(self.z) + 1)
+        return D
+    
+    def metric(self):
+        FI = np.zeros((self.n.shape[0], 2, 2))
+        FI[:, 0, 0] = (self.n * self.p)/(self.p + 1)
+        FI[:, 1, 1] = self.n * self.p
+        return FI        
+
+class NegativeBinomial(RegressionDistn):
+
+    n_params = 2
+    scores = [NegativeBinomialLogScore]
+
+    def __init__(self,params):
+
+        self.logn = params[0]
+        self.n = np.exp(self.logn)
+        #z = log(p/(1-p)) => p = 1/(1 + e^(-z))
+        self.z = params[1]
+        self.p = 1/(1 + np.exp(-self.z))
+        self.dist = dist(n = self.n, p = self.p)
+
+    def fit(Y):
+        assert np.equal(
+            np.mod(Y, 1), 0
+        ).all(), "All Negative Binomial target data must be discrete integers"
+        assert np.all([y >= 0 for y in Y]), "Count data must be >= 0"
+
+        m = minimize(
+            negative_log_likelihood,
+            x0=np.array([np.max(Y),.5]),  # initialized value
+            args=(Y,),
+            bounds=Bounds((1e-8,1e-8),(np.inf,1-1e-8)),
+
+        )
+        return np.array([np.log(m.x[0]), np.log(m.x[1]/(1 - m.x[1]))])
+    
+    def sample(self,m):
+        return np.array([self.dist.rvs() for i in range(m)])
+
+    def __getattr__(
+        self, name
+    ):  # gives us access to NegativeBinomial.mean() required for RegressionDist.predict()
+        if name in dir(self.dist):
+            return getattr(self.dist, name)
+        return None
+    
+    @property
+    def params(self):
+        return {'n':self.n, 'p':self.p}
\ No newline at end of file

From 2bccf2b176fe7c805d178f361606f63c293dd54c Mon Sep 17 00:00:00 2001
From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com>
Date: Wed, 4 Sep 2024 12:03:40 -0400
Subject: [PATCH 02/10] added negative binomial to init

---
 ngboost/distns/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ngboost/distns/__init__.py b/ngboost/distns/__init__.py
index 05e9412..11e96dc 100644
--- a/ngboost/distns/__init__.py
+++ b/ngboost/distns/__init__.py
@@ -7,6 +7,7 @@
 from .laplace import Laplace
 from .lognormal import LogNormal
 from .multivariate_normal import MultivariateNormal
+from .negative_binomial import NegativeBinomial
 from .normal import Normal, NormalFixedMean, NormalFixedVar
 from .poisson import Poisson
 from .t import T, TFixedDf, TFixedDfFixedVar
@@ -23,6 +24,7 @@
     "Laplace",
     "LogNormal",
     "MultivariateNormal",
+    "NegativeBinomial",
     "Normal",
     "NormalFixedMean",
     "NormalFixedVar",

From 58d04f2868814705c37f4db91df9723595ac403a Mon Sep 17 00:00:00 2001
From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com>
Date: Wed, 4 Sep 2024 12:04:28 -0400
Subject: [PATCH 03/10] added negative binomial score testing

---
 tests/test_score.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_score.py b/tests/test_score.py
index 3d291a5..941e4a6 100644
--- a/tests/test_score.py
+++ b/tests/test_score.py
@@ -11,6 +11,7 @@
     Gamma,
     Laplace,
     MultivariateNormal,
+    NegativeBinomial,
     Normal,
     Poisson,
     T,
@@ -100,6 +101,7 @@ def idfn(dist_score: DistScore):
     (Laplace, LogScore),
     (Poisson, LogScore),
     (Gamma, LogScore),
+    (NegativeBinomial,LogScore),
 ] + [(MultivariateNormal(i), LogScore) for i in range(2, 5)]
 # Fill in the dist, score pair to test the gradient
 # Tests all in TEST_METRIC by default

From 0666534cba0971583e3fb1d486edf766d5dadc68 Mon Sep 17 00:00:00 2001
From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com>
Date: Wed, 4 Sep 2024 12:06:08 -0400
Subject: [PATCH 04/10] remove warnings import

---
 ngboost/distns/negative_binomial.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py
index 102bd8a..cceb0ed 100644
--- a/ngboost/distns/negative_binomial.py
+++ b/ngboost/distns/negative_binomial.py
@@ -3,7 +3,6 @@
 from scipy.stats import nbinom as dist
 from scipy.special import digamma
 from scipy.optimize import Bounds, minimize
-import warnings
 
 from ngboost.distns.distn import RegressionDistn
 from ngboost.scores import LogScore

From 5e013f85c95e7777869c6cf69a1ec453cdb0552f Mon Sep 17 00:00:00 2001
From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com>
Date: Wed, 4 Sep 2024 12:22:06 -0400
Subject: [PATCH 05/10] saved params to class variable

---
 ngboost/distns/negative_binomial.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py
index cceb0ed..ea39740 100644
--- a/ngboost/distns/negative_binomial.py
+++ b/ngboost/distns/negative_binomial.py
@@ -34,6 +34,8 @@ class NegativeBinomial(RegressionDistn):
     scores = [NegativeBinomialLogScore]
 
     def __init__(self,params):
+        # save the parameters
+        self._params = params
 
         self.logn = params[0]
         self.n = np.exp(self.logn)

From c23bdaacff2e911555acddf241aa8b0f06408ff9 Mon Sep 17 00:00:00 2001
From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com>
Date: Wed, 4 Sep 2024 17:25:59 -0400
Subject: [PATCH 06/10] disable super init not called

---
 ngboost/distns/negative_binomial.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py
index ea39740..57bc97a 100644
--- a/ngboost/distns/negative_binomial.py
+++ b/ngboost/distns/negative_binomial.py
@@ -33,6 +33,7 @@ class NegativeBinomial(RegressionDistn):
     n_params = 2
     scores = [NegativeBinomialLogScore]
 
+    # pylint: disable=super-init-not-called
     def __init__(self,params):
         # save the parameters
         self._params = params

From d95ce01253352b97d953b64a125b668935a502b3 Mon Sep 17 00:00:00 2001
From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com>
Date: Wed, 11 Sep 2024 10:01:50 -0400
Subject: [PATCH 07/10] lint formatting

---
 ngboost/distns/negative_binomial.py | 46 ++++++++++++++---------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py
index 57bc97a..267dc52 100644
--- a/ngboost/distns/negative_binomial.py
+++ b/ngboost/distns/negative_binomial.py
@@ -7,26 +7,27 @@
 from ngboost.distns.distn import RegressionDistn
 from ngboost.scores import LogScore
 
-#helper function because scipy doesn't provide a fit function natively
-def negative_log_likelihood(params,k):
-    return -dist.logpmf(k = k, n = params[0], p = params[1]).sum()
+# helper function because scipy doesn't provide a fit function natively
+def negative_log_likelihood(params, k):
+    return -dist.logpmf(k=k, n=params[0], p=params[1]).sum()
 
-class NegativeBinomialLogScore(LogScore):
 
+class NegativeBinomialLogScore(LogScore):
     def score(self, Y):
         return -self.dist.logpmf(Y)
-    
+
     def d_score(self, Y):
-        D = np.zeros((len(Y),2))
-        D[:,0] = -self.n * (digamma(Y + self.n) + np.log(self.p) - digamma(self.n))
-        D[:,1] = (Y * np.exp(self.z) - self.n)/(np.exp(self.z) + 1)
+        D = np.zeros((len(Y), 2))
+        D[:, 0] = -self.n * (digamma(Y + self.n) + np.log(self.p) - digamma(self.n))
+        D[:, 1] = (Y * np.exp(self.z) - self.n) / (np.exp(self.z) + 1)
         return D
-    
+
     def metric(self):
         FI = np.zeros((self.n.shape[0], 2, 2))
-        FI[:, 0, 0] = (self.n * self.p)/(self.p + 1)
+        FI[:, 0, 0] = (self.n * self.p) / (self.p + 1)
         FI[:, 1, 1] = self.n * self.p
-        return FI        
+        return FI
+
 
 class NegativeBinomial(RegressionDistn):
 
@@ -34,16 +35,16 @@ class NegativeBinomial(RegressionDistn):
     scores = [NegativeBinomialLogScore]
 
     # pylint: disable=super-init-not-called
-    def __init__(self,params):
+    def __init__(self, params):
         # save the parameters
         self._params = params
 
         self.logn = params[0]
         self.n = np.exp(self.logn)
-        #z = log(p/(1-p)) => p = 1/(1 + e^(-z))
+        # z = log(p/(1-p)) => p = 1/(1 + e^(-z))
         self.z = params[1]
-        self.p = 1/(1 + np.exp(-self.z))
-        self.dist = dist(n = self.n, p = self.p)
+        self.p = 1 / (1 + np.exp(-self.z))
+        self.dist = dist(n=self.n, p=self.p)
 
     def fit(Y):
         assert np.equal(
@@ -53,14 +54,13 @@ def fit(Y):
 
         m = minimize(
             negative_log_likelihood,
-            x0=np.array([np.max(Y),.5]),  # initialized value
+            x0=np.array([np.max(Y), 0.5]),  # initialized value
             args=(Y,),
-            bounds=Bounds((1e-8,1e-8),(np.inf,1-1e-8)),
-
+            bounds=Bounds((1e-8, 1e-8), (np.inf, 1 - 1e-8)),
         )
-        return np.array([np.log(m.x[0]), np.log(m.x[1]/(1 - m.x[1]))])
-    
-    def sample(self,m):
+        return np.array([np.log(m.x[0]), np.log(m.x[1] / (1 - m.x[1]))])
+
+    def sample(self, m):
         return np.array([self.dist.rvs() for i in range(m)])
 
     def __getattr__(
@@ -69,7 +69,7 @@ def __getattr__(
         if name in dir(self.dist):
             return getattr(self.dist, name)
         return None
-    
+
     @property
     def params(self):
-        return {'n':self.n, 'p':self.p}
\ No newline at end of file
+        return {"n": self.n, "p": self.p}

From 14b715269661ffd406d59e2419ef7d5220fc23c4 Mon Sep 17 00:00:00 2001
From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com>
Date: Wed, 11 Sep 2024 10:02:14 -0400
Subject: [PATCH 08/10] more formatting

---
 tests/test_score.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_score.py b/tests/test_score.py
index 941e4a6..c67b4ec 100644
--- a/tests/test_score.py
+++ b/tests/test_score.py
@@ -101,7 +101,7 @@ def idfn(dist_score: DistScore):
     (Laplace, LogScore),
     (Poisson, LogScore),
     (Gamma, LogScore),
-    (NegativeBinomial,LogScore),
+    (NegativeBinomial, LogScore),
 ] + [(MultivariateNormal(i), LogScore) for i in range(2, 5)]
 # Fill in the dist, score pair to test the gradient
 # Tests all in TEST_METRIC by default

From 21e02d3b909b0e93c650a34d9e9d6ab420cdb8fc Mon Sep 17 00:00:00 2001
From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com>
Date: Wed, 11 Sep 2024 10:22:48 -0400
Subject: [PATCH 09/10] isort changes

---
 ngboost/distns/negative_binomial.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py
index 267dc52..bb204ba 100644
--- a/ngboost/distns/negative_binomial.py
+++ b/ngboost/distns/negative_binomial.py
@@ -1,12 +1,13 @@
 """The NGBoost NegativeBinomial distribution and scores"""
 import numpy as np
-from scipy.stats import nbinom as dist
-from scipy.special import digamma
 from scipy.optimize import Bounds, minimize
+from scipy.special import digamma
+from scipy.stats import nbinom as dist
 
 from ngboost.distns.distn import RegressionDistn
 from ngboost.scores import LogScore
 
+
 # helper function because scipy doesn't provide a fit function natively
 def negative_log_likelihood(params, k):
     return -dist.logpmf(k=k, n=params[0], p=params[1]).sum()

From 3f0786fa0b46e28535f6deef53501d432ab6e9c1 Mon Sep 17 00:00:00 2001
From: ashler-herrick <124288133+ashler-herrick@users.noreply.github.com>
Date: Wed, 11 Sep 2024 17:42:54 -0400
Subject: [PATCH 10/10] formatting

---
 ngboost/distns/negative_binomial.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ngboost/distns/negative_binomial.py b/ngboost/distns/negative_binomial.py
index bb204ba..ada885a 100644
--- a/ngboost/distns/negative_binomial.py
+++ b/ngboost/distns/negative_binomial.py
@@ -26,7 +26,9 @@ def d_score(self, Y):
     def metric(self):
         FI = np.zeros((self.n.shape[0], 2, 2))
         FI[:, 0, 0] = (self.n * self.p) / (self.p + 1)
-        FI[:, 1, 1] = self.n * self.p
+        FI[:, 1, 0] = (self.p - 1) * self.n
+        FI[:, 0, 1] = (self.p - 1) * self.n
+        FI[:, 1, 1] = self.n * (1 - self.p)
         return FI