Fix MvNormal and truncated (#54)

storopoli · Jul 24, 2022 · 18e9edc · 18e9edc
1 parent 1d549bf
commit 18e9edc
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 25 deletions.
diff --git a/_literate/10_multilevel_models.jl b/_literate/10_multilevel_models.jl
@@ -102,12 +102,12 @@ setprogress!(false) # hide
     σ ~ Exponential(1 / std(y))             # residual SD
     #prior for variance of random intercepts
     #usually requires thoughtful specification
-    τ ~ truncated(Cauchy(0, 2), 0, Inf)     # group-level SDs intercepts
+    τ ~ truncated(Cauchy(0, 2); lower=0)    # group-level SDs intercepts
     αⱼ ~ filldist(Normal(0, τ), n_gr)       # group-level intercepts
 
     #likelihood
     ŷ = α .+ X * β .+ αⱼ[idx]
-    y ~ MvNormal(ŷ, σ)
+    y ~ MvNormal(ŷ, σ^2 * I)
 end;
 
 # ### Random-Slope Model
@@ -137,16 +137,16 @@ end;
 
 @model function varying_slope(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2))
     #priors
-    α ~ Normal(mean(y), 2.5 * std(y))                   # population-level intercept
-    σ ~ Exponential(1 / std(y))                         # residual SD
+    α ~ Normal(mean(y), 2.5 * std(y))                    # population-level intercept
+    σ ~ Exponential(1 / std(y))                          # residual SD
     #prior for variance of random slopes
     #usually requires thoughtful specification
-    τ ~ filldist(truncated(Cauchy(0, 2), 0, Inf), n_gr) # group-level slopes SDs
-    βⱼ ~ filldist(Normal(0, 1), predictors, n_gr)       # group-level standard normal slopes
+    τ ~ filldist(truncated(Cauchy(0, 2); lower=0), n_gr) # group-level slopes SDs
+    βⱼ ~ filldist(Normal(0, 1), predictors, n_gr)        # group-level standard normal slopes
 
     #likelihood
     ŷ = α .+ X * βⱼ * τ
-    y ~ MvNormal(ŷ, σ)
+    y ~ MvNormal(ŷ, σ^2 * I)
 end;
 
 # ### Random-Intercept-Slope Model
@@ -177,20 +177,30 @@ end;
 
 @model function varying_intercept_slope(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2))
     #priors
-    α ~ Normal(mean(y), 2.5 * std(y))                    # population-level intercept
-    σ ~ Exponential(1 / std(y))                          # residual SD
+    α ~ Normal(mean(y), 2.5 * std(y))                     # population-level intercept
+    σ ~ Exponential(1 / std(y))                           # residual SD
     #prior for variance of random intercepts and slopes
     #usually requires thoughtful specification
-    τₐ ~ truncated(Cauchy(0, 2), 0, Inf)                 # group-level SDs intercepts
-    τᵦ ~ filldist(truncated(Cauchy(0, 2), 0, Inf), n_gr) # group-level slopes SDs
-    αⱼ ~ filldist(Normal(0, τₐ), n_gr)                   # group-level intercepts
-    βⱼ ~ filldist(Normal(0, 1), predictors, n_gr)        # group-level standard normal slopes
+    τₐ ~ truncated(Cauchy(0, 2); lower=0)                 # group-level SDs intercepts
+    τᵦ ~ filldist(truncated(Cauchy(0, 2); lower=0), n_gr) # group-level slopes SDs
+    αⱼ ~ filldist(Normal(0, τₐ), n_gr)                    # group-level intercepts
+    βⱼ ~ filldist(Normal(0, 1), predictors, n_gr)         # group-level standard normal slopes
 
     #likelihood
     ŷ = α .+ αⱼ[idx] .+ X * βⱼ * τᵦ
-    y ~ MvNormal(ŷ, σ)
+    y ~ MvNormal(ŷ, σ^2 * I)
 end;
 
+# In all of the models,
+# we are using the `MvNormal` construction where we specify both
+# a vector of means (first positional argument)
+# and a covariance matrix (second positional argument).
+# Regarding the covariance matrix `σ^2 * I`,
+# it uses the model's errors `σ`, here parameterized as a standard deviation,
+# squares it to produce a variance paramaterization,
+# and multiplies by `I`, which is Julia's `LinearAlgebra` standard module implementation
+# to represent an identity matrix of any size.
+
 # ## Example - Cheese Ratings
 
 # For our example, I will use a famous dataset called `cheese` (Boatwright, McCulloch & Rossi, 1999), which is data from

diff --git a/_literate/11_Turing_tricks.jl b/_literate/11_Turing_tricks.jl
@@ -56,7 +56,7 @@ setprogress!(false) # hide
     σ ~ Exponential(1)
 
     #likelihood
-    y ~ MvNormal(α .+ X * β, σ)
+    y ~ MvNormal(α .+ X * β, σ^2 * I)
 end;
 
 using DataFrames, CSV, HTTP
@@ -184,12 +184,12 @@ chain_ncp_funnel = sample(ncp_funnel(), NUTS(), MCMCThreads(), 2_000, 4)
     σ ~ Exponential(1 / std(y))             # residual SD
     #prior for variance of random intercepts
     #usually requires thoughtful specification
-    τ ~ truncated(Cauchy(0, 2), 0, Inf)     # group-level SDs intercepts
+    τ ~ truncated(Cauchy(0, 2); lower=0)    # group-level SDs intercepts
     αⱼ ~ filldist(Normal(0, τ), n_gr)       # CP group-level intercepts
 
     #likelihood
-    ŷ = α .+ X * β .+ αⱼ[idx]
-    y ~ MvNormal(ŷ, σ)
+    ŷ = α .+ X * β .+ αⱼ[idx]
+    y ~ MvNormal(ŷ, σ^2 * I)
 end;
 
 # To perform a Non-Centered Parametrization (NCP) in this model we do as following:
@@ -202,12 +202,12 @@ end;
 
     #prior for variance of random intercepts
     #usually requires thoughtful specification
-    τ ~ truncated(Cauchy(0, 2), 0, Inf)    # group-level SDs intercepts
+    τ ~ truncated(Cauchy(0, 2); lower=0)   # group-level SDs intercepts
     zⱼ ~ filldist(Normal(0, 1), n_gr)      # NCP group-level intercepts
 
     #likelihood
-    ŷ = α .+ X * β .+ zⱼ[idx] .* τ
-    y ~ MvNormal(ŷ, σ)
+    ŷ = α .+ X * β .+ zⱼ[idx] .* τ
+    y ~ MvNormal(ŷ, σ^2 * I)
 end;
 
 # Here we are using a NCP with the `zⱼ`s following a standard normal and we reconstruct the

diff --git a/_literate/12_epi_models.jl b/_literate/12_epi_models.jl
@@ -158,9 +158,9 @@ setprogress!(false) # hide
     l = length(infected)
 
     #priors
-    β ~ TruncatedNormal(2, 1, 1e-4, 10)     # using 10 instead of Inf because numerical issues arose
-    γ ~ TruncatedNormal(0.4, 0.5, 1e-4, 10) # using 10 instead of Inf because numerical issues arose
-    ϕ⁻ ~ truncated(Exponential(5), 0, 1e5)
+    β ~ TruncatedNormal(2, 1, 1e-4, 10)     # using 10 because numerical issues arose
+    γ ~ TruncatedNormal(0.4, 0.5, 1e-4, 10) # using 10 because numerical issues arose
+    ϕ⁻ ~ truncated(Exponential(5); lower=0, upper=1e5)
     ϕ = 1.0 / ϕ⁻
 
     #ODE Stuff

diff --git a/_literate/6_linear_reg.jl b/_literate/6_linear_reg.jl
@@ -84,7 +84,7 @@ setprogress!(false) # hide
     σ ~ Exponential(1)
 
     #likelihood
-    y ~ MvNormal(α .+ X * β, σ)
+    y ~ MvNormal(α .+ X * β, σ^2 * I)
 end;
 
 # Here I am specifying very weakly informative priors:
@@ -93,6 +93,15 @@ end;
 # * $\boldsymbol{\beta} \sim \text{Student-}t(0,1,3)$ -- The predictors all have a prior distribution of a Student-$t$ distribution centered on 0 with variance 1 and degrees of freedom $\nu = 3$. That wide-tailed $t$ distribution will cover all possible values for our coefficients. Remember the Student-$t$ also has support over all the real number line $\in (-\infty, +\infty)$. Also the `filldist()` is a nice Turing's function which takes any univariate or multivariate distribution and returns another distribution that repeats the input distribution.
 # * $\sigma \sim \text{Exponential}(1)$ -- A wide-tailed-positive-only distribution perfectly suited for our model's error.
 
+# Also, we are using the `MvNormal` construction where we specify both
+# a vector of means (first positional argument)
+# and a covariance matrix (second positional argument).
+# Regarding the covariance matrix `σ^2 * I`,
+# it uses the model's errors `σ`, here parameterized as a standard deviation,
+# squares it to produce a variance paramaterization,
+# and multiplies by `I`, which is Julia's `LinearAlgebra` standard module implementation
+# to represent an identity matrix of any size.
+
 # ## Example - Children's IQ Score
 
 # For our example, I will use a famous dataset called `kidiq` (Gelman & Hill, 2007), which is data from a survey of adult American women and their respective children. Dated from 2007, it has 434 observations and 4 variables: