Merge remote-tracking branch 'upstream/main'

fonnesbeck · Sep 11, 2024 · 7c214ef · 7c214ef
2 parents fae292f + 2d9fec2
commit 7c214ef
Show file tree

Hide file tree

Showing 35 changed files with 13,634 additions and 6,010 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,11 +3,11 @@ ci:
 
 repos:
 - repo: https://github.com/psf/black
-  rev: 23.7.0
+  rev: 24.4.2
   hooks:
     - id: black-jupyter
 - repo: https://github.com/nbQA-dev/nbQA
-  rev: 1.7.0
+  rev: 1.8.5
   hooks:
     - id: nbqa-isort
       additional_dependencies: [isort==5.6.4]
@@ -99,7 +99,7 @@ repos:
       language: pygrep
       types_or: [markdown, rst, jupyter]
 - repo: https://github.com/mwouts/jupytext
-  rev: v1.15.1
+  rev: v1.16.3
   hooks:
   - id: jupytext
     files: ^examples/.+\.ipynb$

diff --git a/examples/data/GMB_elv_msk.tif b/examples/data/GMB_elv_msk.tif
diff --git a/examples/data/gambia_dataset.csv b/examples/data/gambia_dataset.csv
diff --git a/examples/diagnostics_and_criticism/model_averaging.ipynb b/examples/diagnostics_and_criticism/model_averaging.ipynb
diff --git a/examples/diagnostics_and_criticism/model_averaging.myst.md b/examples/diagnostics_and_criticism/model_averaging.myst.md
diff --git a/examples/fundamentals/data_container.ipynb b/examples/fundamentals/data_container.ipynb
diff --git a/examples/fundamentals/data_container.myst.md b/examples/fundamentals/data_container.myst.md
diff --git a/examples/gaussian_processes/HSGP-Advanced.ipynb b/examples/gaussian_processes/HSGP-Advanced.ipynb
diff --git a/examples/gaussian_processes/HSGP-Advanced.myst.md b/examples/gaussian_processes/HSGP-Advanced.myst.md
@@ -5,9 +5,9 @@ jupytext:
     format_name: myst
     format_version: 0.13
 kernelspec:
-  display_name: pymc-examples
+  display_name: pymc
   language: python
-  name: pymc-examples
+  name: python3
 ---
 
 (hsgp-advanced)=
@@ -44,6 +44,7 @@ A secondary goal of this implementation is flexibility via an accessible impleme
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
+import preliz as pz
 import pymc as pm
 import pytensor.tensor as pt
 ```
@@ -140,7 +141,9 @@ cov_mu = cov_trend + cov_short
 # Define the delta GPs
 n_gps = 10
 eta_delta_true = 3
-ell_delta_true = pm.draw(pm.Lognormal.dist(mu=np.log(ell_mu_short_true), sigma=0.5), draws=n_gps)
+ell_delta_true = pm.draw(
+    pm.Lognormal.dist(mu=np.log(ell_mu_short_true), sigma=0.5), draws=n_gps, random_seed=rng
+)
 
 cov_deltas = [
     eta_delta_true**2 * pm.gp.cov.Matern52(input_dim=1, ls=ell_i) for ell_i in ell_delta_true
@@ -166,12 +169,14 @@ def generate_gp_samples(x, cov_mu, cov_deltas, noise_dist, rng):
     """
     n = len(x)
     # One draw from the mean GP
-    f_mu_true = pm.draw(pm.MvNormal.dist(mu=np.zeros(n), cov=cov_mu(x[:, None])))
+    f_mu_true = pm.draw(pm.MvNormal.dist(mu=np.zeros(n), cov=cov_mu(x[:, None])), random_seed=rng)
 
     # Draws from the delta GPs
     f_deltas = []
     for cov_delta in cov_deltas:
-        f_deltas.append(pm.draw(pm.MvNormal.dist(mu=np.zeros(n), cov=cov_delta(x[:, None]))))
+        f_deltas.append(
+            pm.draw(pm.MvNormal.dist(mu=np.zeros(n), cov=cov_delta(x[:, None])), random_seed=rng)
+        )
     f_delta = np.vstack(f_deltas)
 
     # The hierarchical GP
@@ -435,10 +440,9 @@ with pm.Model(coords=coords) as model:
     ell_mu_short = pm.Deterministic("ell_mu_short", pt.softplus(log_ell_mu_short))
 
     eta_mu_trend = pm.Gamma("eta_mu_trend", mu=3.5, sigma=1)
-    ell_mu_trend_params = pm.find_constrained_prior(
-        pm.InverseGamma, lower=5, upper=12, mass=0.95, init_guess={"mu": 9.0, "sigma": 3.0}
+    ell_mu_trend = pz.maxent(pz.InverseGamma(), lower=5, upper=12, mass=0.95, plot=False).to_pymc(
+        "ell_mu_trend"
     )
-    ell_mu_trend = pm.InverseGamma("ell_mu_trend", **ell_mu_trend_params)
 
     ## Prior for the offsets
     log_ell_delta_offset = pm.ZeroSumNormal("log_ell_delta_offset", dims="gp_ix")
@@ -473,7 +477,7 @@ Now, what do these priors mean? Good question. As always, it's crucial to do **p
 
 ```{code-cell} ipython3
 with model:
-    idata = pm.sample_prior_predictive()
+    idata = pm.sample_prior_predictive(random_seed=rng)
 ```
 
 ```{code-cell} ipython3
@@ -564,7 +568,7 @@ Once we're satisfied with our priors, which is the case here, we can... sample t
 
 ```{code-cell} ipython3
 with model:
-    idata.extend(pm.sample(nuts_sampler="numpyro", target_accept=0.9))
+    idata.extend(pm.sample(nuts_sampler="numpyro", target_accept=0.9, random_seed=rng))
 ```
 
 ```{code-cell} ipython3
@@ -669,6 +673,7 @@ with model:
             var_names=["f_mu", "f"],
             predictions=True,
             compile_kwargs={"mode": "NUMBA"},
+            random_seed=rng,
         ),
     )
 ```
@@ -863,7 +868,11 @@ cov_t = pm.gp.cov.Matern52(input_dim=1, ls=ell_t_true)
 Kt = cov_t(t[:, None])
 
 K = pt.slinalg.kron(Kx, Kt)
-f_true = pm.draw(pm.MvNormal.dist(mu=np.zeros(n_gps * n_t), cov=K)).reshape(n_gps, n_t).T
+f_true = (
+    pm.draw(pm.MvNormal.dist(mu=np.zeros(n_gps * n_t), cov=K), random_seed=rng)
+    .reshape(n_gps, n_t)
+    .T
+)
 
 # Additive gaussian noise
 sigma_noise = 0.5
@@ -947,17 +956,11 @@ with pm.Model() as model:
     Xs_x = Xx - xx_center
 
     ## covariance on time GP
-    ell_t_params = pm.find_constrained_prior(
-        pm.Lognormal, lower=0.5, upper=4.0, mass=0.95, init_guess={"mu": 1.0, "sigma": 1.0}
-    )
-    ell_t = pm.Lognormal("ell_t", **ell_t_params)
+    ell_t = pz.maxent(pz.LogNormal(), lower=0.5, upper=4.0, mass=0.95, plot=False).to_pymc("ell_t")
     cov_t = pm.gp.cov.Matern52(1, ls=ell_t)
 
     ## covariance on space GP
-    ell_x_params = pm.find_constrained_prior(
-        pm.Lognormal, lower=0.5, upper=4.0, mass=0.95, init_guess={"mu": 1.0, "sigma": 1.0}
-    )
-    ell_x = pm.Lognormal("ell_x", **ell_x_params)
+    ell_x = pz.maxent(pz.LogNormal(), lower=0.5, upper=4.0, mass=0.95, plot=False).to_pymc("ell_x")
     cov_x = pm.gp.cov.Matern52(1, ls=ell_x)
 
     ## Kronecker GP
@@ -981,7 +984,7 @@ pm.model_to_graphviz(model)
 
 ```{code-cell} ipython3
 with model:
-    idata = pm.sample_prior_predictive()
+    idata = pm.sample_prior_predictive(random_seed=rng)
 ```
 
 ```{code-cell} ipython3
@@ -1015,7 +1018,7 @@ axs[1].set(ylim=ylims, title=r"Prior GPs, $\pm 1 \sigma$ posterior intervals");
 
 ```{code-cell} ipython3
 with model:
-    idata.extend(pm.sample(nuts_sampler="numpyro"))
+    idata.extend(pm.sample(nuts_sampler="numpyro", random_seed=rng))
 ```
 
 ```{code-cell} ipython3
@@ -1075,6 +1078,7 @@ And isn't this beautiful?? Now go on, and HSGP-on!
 ## Authors
 
 * Created by [Bill Engels](https://github.com/bwengals), [Alexandre Andorra](https://github.com/AlexAndorra) and [Maxim Kochurov](https://github.com/ferrine) in 2024 ([pymc-examples#668](https://github.com/pymc-devs/pymc-examples/pull/668))
+* Use `pz.maxent` instead of `pm.find_constrained_prior`, and add random seed. [Osvaldo Martin](https://aloctavodia.github.io/). August 2024
 
 +++
 

diff --git a/examples/gaussian_processes/HSGP-Basic.ipynb b/examples/gaussian_processes/HSGP-Basic.ipynb
diff --git a/examples/gaussian_processes/HSGP-Basic.myst.md b/examples/gaussian_processes/HSGP-Basic.myst.md
@@ -5,9 +5,9 @@ jupytext:
     format_name: myst
     format_version: 0.13
 kernelspec:
-  display_name: pymc-examples
+  display_name: preliz
   language: python
-  name: pymc-examples
+  name: python3
 ---
 
 (hsgp)=
@@ -56,6 +56,7 @@ We'll use simulated data to motivate an overview of the usage of {class}`~pymc.g
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
+import preliz as pz
 import pymc as pm
 import pytensor.tensor as pt
 
@@ -114,42 +115,21 @@ ax.grid(False)
 
 +++
 
-First we use `pm.find_constrained_prior` to choose our prior for the lengthscale parameter.  We use a Lognormal to penalize very small lengthscales while having a heavy right tail.  When the signal from the GP is high relative to the noise, we are able to use more informative priors.
+First we use `pz.maxent` to choose our prior for the lengthscale parameter, maxent return the maximum entropy prior with the specified `mass` within the interval [`lower`, `upper`].  
 
-```{code-cell} ipython3
-ell_dist = pm.Lognormal
+We use a Lognormal to penalize very small lengthscales while having a heavy right tail.  When the signal from the GP is high relative to the noise, we are able to use more informative priors.
 
+```{code-cell} ipython3
 lower, upper = 0.5, 5.0
-ell_params = pm.find_constrained_prior(
-    ell_dist, lower=lower, upper=upper, mass=0.9, init_guess={"mu": 1.0, "sigma": 1.0}
+ell_dist, ax = pz.maxent(
+    pz.LogNormal(),
+    lower=lower,
+    upper=upper,
+    mass=0.9,
+    plot_kwargs={"support": (0, 7), "legend": None},
 )
 
-support = np.linspace(0, 7.0, 1000)
-logp = pm.logp(ell_dist.dist(**ell_params), support)
-p = np.exp(logp.eval())
-
-_, ax = plt.subplots()
-
-bulk_ix = (support >= lower) & (support <= upper)
-ax.fill_between(
-    support[bulk_ix],
-    np.zeros(sum(bulk_ix)),
-    p[bulk_ix],
-    color="slateblue",
-    alpha=0.3,
-    edgecolor="none",
-    label="90% of mass",
-)
-ax.plot(support, p, color="k", lw=2)
-
-ax.set(
-    xlabel="x",
-    ylabel="p(x)",
-    xlim=[-0.1, 7.0],
-    ylim=[0.0, 0.6],
-    title=r"Prior for the lengthscale, $\ell$",
-)
-ax.legend();
+ax.set_title(r"Prior for the lengthscale, $\ell$")
 ```
 
 There are a few things to note about the model code below:
@@ -158,7 +138,7 @@ There are a few things to note about the model code below:
 
 ```{code-cell} ipython3
 with pm.Model(coords={"basis_coeffs": np.arange(200), "obs_id": np.arange(len(y_obs))}) as model:
-    ell = ell_dist("ell", **ell_params)
+    ell = ell_dist.to_pymc("ell")
     eta = pm.Exponential("eta", scale=1.0)
     cov_func = eta**2 * pm.gp.cov.Matern52(input_dim=1, ls=ell)
 
@@ -174,7 +154,7 @@ with pm.Model(coords={"basis_coeffs": np.arange(200), "obs_id": np.arange(len(y_
     pm.Normal("y_obs", mu=f, sigma=sigma, observed=y_obs, dims="obs_id")
 
     idata = pm.sample()
-    idata.extend(pm.sample_posterior_predictive(idata))
+    idata.extend(pm.sample_posterior_predictive(idata, random_seed=rng))
 ```
 
 ```{code-cell} ipython3
@@ -579,7 +559,7 @@ with pm.Model() as model:
     sigma = pm.Exponential("sigma", scale=2.0)
     pm.Normal("y_obs", mu=mu, sigma=sigma, observed=y_tr, shape=X_gp.shape[0])
 
-    idata = pm.sample_prior_predictive()
+    idata = pm.sample_prior_predictive(random_seed=rng)
 
 pm.model_to_graphviz(model)
 ```
@@ -628,7 +608,7 @@ Now, let's sample the model and quickly check the results:
 
 ```{code-cell} ipython3
 with model:
-    idata.extend(pm.sample(nuts_sampler="numpyro"))
+    idata.extend(pm.sample(nuts_sampler="numpyro", random_seed=rng))
 ```
 
 ```{code-cell} ipython3
@@ -663,7 +643,9 @@ with model:
     pm.set_data({"X_gp": X[:, :2], "X_fe": X[:, 2]})
 
     idata_thinned = idata.sel(draw=slice(None, None, 10))
-    idata.extend(pm.sample_posterior_predictive(idata_thinned, var_names=["f", "mu"]))
+    idata.extend(
+        pm.sample_posterior_predictive(idata_thinned, var_names=["f", "mu"], random_seed=rng)
+    )
 ```
 
 ```{code-cell} ipython3
@@ -704,6 +686,7 @@ Sampling diagnostics all look good, and we can see that the underlying GP was in
 ## Authors
 
 * Created by [Bill Engels](https://github.com/bwengals) and [Alexandre Andorra](https://github.com/AlexAndorra) in 2024 ([pymc-examples#647](https://github.com/pymc-devs/pymc-examples/pull/647))
+* Use `pz.maxent` instead of `pm.find_constrained_prior`, and add random seed. [Osvaldo Martin](https://aloctavodia.github.io/). August 2024
 
 +++