Add theta_to_scipy_params and unify distribution methods (pdf, cdf, e…

…tc.)
simon-hirsch · Feb 26, 2025 · e1260ba · e1260ba
1 parent 113c78e
commit e1260ba
Show file tree

Hide file tree

Showing 5 changed files with 57 additions and 69 deletions.
diff --git a/src/rolch/base/distribution.py b/src/rolch/base/distribution.py
@@ -24,6 +24,18 @@ def parameter_support(self) -> dict:
         """The support of each parameter of the distribution."""
         pass
 
+    @property
+    @abstractmethod
+    def scipy_parameters(self) -> Tuple[str]:
+        """The names of the parameters in the scipy.stats distribution and the corresponding column in theta."""
+        pass
+
+    def theta_to_scipy_params(self, theta: np.ndarray) -> dict:
+        params = {}
+        for name, index in self.scipy_parameters.items():
+            params[name] = theta[:, index]
+        return params
+
     @abstractmethod
     def theta_to_params(self, theta: np.ndarray) -> Tuple:
         """Take the fitted values and return tuple of vectors for distribution parameters."""

diff --git a/src/rolch/distributions/gamma.py b/src/rolch/distributions/gamma.py
@@ -23,7 +23,7 @@ class DistributionGamma(Distribution):
 
         This parameterization is different to the `scipy.stats.gamma(alpha, loc, scale)` parameterization.
 
-        We can use `DistributionGamma().gamlss_to_scipy(mu, sigma)` to map the distribution parameters to scipy.
+        We can use `DistributionGamma().theta_to_scipy_params(theta)` to map the distribution parameters to scipy.
 
     The `scipy.stats.gamma()` distribution is defined as:
     $$
@@ -54,37 +54,35 @@ def __init__(
         self.corresponding_gamlss: str = "GA"
         self.scipy_dist: st.rv_continuous = st.gamma
 
-    n_params = 2
-
     distribution_support = (np.nextafter(0, 1), np.inf)
+
+    n_params = 2
     parameter_support = {
         0: (np.nextafter(0, 1), np.inf),
         1: (np.nextafter(0, 1), np.inf),
     }
+    # Theta columns do not map 1:1 to scipy parameters, to we have to overload theta_to_scipy_params
+    scipy_parameters = {"a": 0, "loc": 0, "scale": 0}
 
     def theta_to_params(self, theta: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
         mu = theta[:, 0]
         sigma = theta[:, 1]
         return mu, sigma
 
-    @staticmethod
-    def gamlss_to_scipy(
-        mu: np.ndarray, sigma: np.ndarray
-    ) -> Tuple[np.ndarray, int, np.ndarray]:
+    def theta_to_scipy_params(self, theta: np.ndarray) -> dict:
         """Map GAMLSS Parameters to scipy parameters.
 
         Args:
-            mu (np.ndarray): mu parameter
-            sigma (np.ndarray): sigma parameter
+            theta (np.ndarray): parameters
 
         Returns:
-            tuple: Tuple of (alpha, loc, scale) for scipy.stats.gamma(alpha, loc, scale)
+            dict: Dict of (a, loc, scale) for scipy.stats.gamma(a, loc, scale)
         """
-        alpha = 1 / sigma**2
+        mu = theta[:, 0]
+        sigma = theta[:, 1]
         beta = 1 / (sigma**2 * mu)
-        loc = 0
-        scale = 1 / beta
-        return alpha, loc, scale
+        params = {"a": 1 / sigma**2, "loc": 0, "scale": 1 / beta}
+        return params
 
     def dl1_dp1(self, y: np.ndarray, theta: np.ndarray, param: int = 0) -> np.ndarray:
         self._validate_dln_dpn_inputs(y, theta, param)
@@ -142,21 +140,17 @@ def initial_values(
             return np.ones_like(y)
 
     def cdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma = self.theta_to_params(theta)
-        return self.scipy_dist(*self.gamlss_to_scipy(mu, sigma)).cdf(y)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).cdf(y)
 
     def pdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma = self.theta_to_params(theta)
-        return self.scipy_dist(*self.gamlss_to_scipy(mu, sigma)).pdf(y)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).pdf(y)
 
     def ppf(self, q: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma = self.theta_to_params(theta)
-        return self.scipy_dist(*self.gamlss_to_scipy(mu, sigma)).ppf(q)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).ppf(q)
 
     def rvs(self, size: int, theta: np.ndarray) -> np.ndarray:
-        mu, sigma = self.theta_to_params(theta)
         return (
-            self.scipy_dist(*self.gamlss_to_scipy(mu, sigma))
+            self.scipy_dist(**self.theta_to_scipy_params(theta))
             .rvs((size, theta.shape[0]))
             .T
         )
diff --git a/src/rolch/distributions/johnsonsu.py b/src/rolch/distributions/johnsonsu.py
@@ -35,6 +35,7 @@ def __init__(
             self.skew_link,
             self.tail_link,
         ]
+        self.scipy_dist: st.rv_continuous = st.johnsonsu
 
     n_params = 4
 
@@ -45,6 +46,7 @@ def __init__(
         2: (-np.inf, np.inf),
         3: (np.nextafter(0, 1), np.inf),
     }
+    scipy_parameters = {"loc": 0, "scale": 1, "a": 2, "b": 3}
 
     def theta_to_params(
         self, theta: np.ndarray
@@ -218,41 +220,17 @@ def initial_values(
             return np.full_like(y, 10)
 
     def cdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma, nu, tau = self.theta_to_params(theta)
-        return st.johnsonsu(
-            loc=mu,
-            scale=sigma,
-            a=nu,
-            b=tau,
-        ).cdf(y)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).cdf(y)
 
     def pdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma, nu, tau = self.theta_to_params(theta)
-        return st.johnsonsu(
-            loc=mu,
-            scale=sigma,
-            a=nu,
-            b=tau,
-        ).pdf(y)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).pdf(y)
 
     def ppf(self, q: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma, nu, tau = self.theta_to_params(theta)
-        return st.johnsonsu(
-            loc=mu,
-            scale=sigma,
-            a=nu,
-            b=tau,
-        ).ppf(q)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).ppf(q)
 
     def rvs(self, size: int, theta: np.ndarray) -> np.ndarray:
-        mu, sigma, nu, tau = self.theta_to_params(theta)
         return (
-            st.johnsonsu(
-                loc=mu,
-                scale=sigma,
-                a=nu,
-                b=tau,
-            )
+            self.scipy_dist(**self.theta_to_scipy_params(theta))
             .rvs((size, theta.shape[0]))
             .T
         )
diff --git a/src/rolch/distributions/normal.py b/src/rolch/distributions/normal.py
@@ -18,11 +18,12 @@ def __init__(
         self.loc_link: LinkFunction = loc_link
         self.scale_link: LinkFunction = scale_link
         self.links: list[LinkFunction] = [self.loc_link, self.scale_link]
-
-    n_params = 2
+        self.scipy_dist: st.rv_continuous = st.norm
 
     distribution_support = (-np.inf, np.inf)
+    n_params = 2
     parameter_support = {0: (-np.inf, np.inf), 1: (np.nextafter(0, 1), np.inf)}
+    scipy_parameters = {"loc": 0, "scale": 1}
 
     def theta_to_params(self, theta: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
         mu = theta[:, 0]
@@ -78,17 +79,17 @@ def initial_values(
             return np.repeat(np.std(y, axis=axis), y.shape[0])
 
     def cdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma = self.theta_to_params(theta)
-        return st.norm(mu, sigma).cdf(y)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).cdf(y)
 
     def pdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma = self.theta_to_params(theta)
-        return st.norm(mu, sigma).pdf(y)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).pdf(y)
 
     def ppf(self, q: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma = self.theta_to_params(theta)
-        return st.norm(mu, sigma).ppf(q)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).ppf(q)
 
     def rvs(self, size: int, theta: np.ndarray) -> np.ndarray:
-        mu, sigma = self.theta_to_params(theta)
-        return st.norm(mu, sigma).rvs((size, theta.shape[0])).T
+        return (
+            self.scipy_dist(**self.theta_to_scipy_params(theta))
+            .rvs((size, theta.shape[0]))
+            .T
+        )
diff --git a/src/rolch/distributions/studentt.py b/src/rolch/distributions/studentt.py
@@ -25,14 +25,17 @@ def __init__(
             self.scale_link,
             self.tail_link,
         ]
+        self.scipy_dist: st.rv_continuous = st.t
 
-    n_params: int = 3
     distribution_support = (-np.inf, np.inf)
+
+    n_params: int = 3
     parameter_support = {
         0: (-np.inf, np.inf),
         1: (np.nextafter(0, 1), np.inf),
         2: (np.nextafter(0, 1), np.inf),
     }
+    scipy_parameters = {"loc": 0, "scale": 1, "df": 2}
 
     def theta_to_params(
         self, theta: np.ndarray
@@ -135,17 +138,17 @@ def initial_values(
             return np.full_like(y, 10)
 
     def cdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma, nu = self.theta_to_params(theta)
-        return st.t(nu, mu, sigma).cdf(y)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).cdf(y)
 
     def pdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma, nu = self.theta_to_params(theta)
-        return st.t(nu, mu, sigma).pdf(y)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).pdf(y)
 
     def ppf(self, q: np.ndarray, theta: np.ndarray) -> np.ndarray:
-        mu, sigma, nu = self.theta_to_params(theta)
-        return st.t(nu, mu, sigma).ppf(q)
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).ppf(q)
 
     def rvs(self, size: int, theta: np.ndarray) -> np.ndarray:
-        mu, sigma, nu = self.theta_to_params(theta)
-        return st.t(nu, mu, sigma).rvs((size, theta.shape[0])).T
+        return (
+            self.scipy_dist(**self.theta_to_scipy_params(theta))
+            .rvs((size, theta.shape[0]))
+            .T
+        )