diff --git a/_literate/10_multilevel_models.jl b/_literate/10_multilevel_models.jl index 1fb4a304..355cb572 100644 --- a/_literate/10_multilevel_models.jl +++ b/_literate/10_multilevel_models.jl @@ -102,12 +102,12 @@ setprogress!(false) # hide σ ~ Exponential(1 / std(y)) # residual SD #prior for variance of random intercepts #usually requires thoughtful specification - τ ~ truncated(Cauchy(0, 2), 0, Inf) # group-level SDs intercepts + τ ~ truncated(Cauchy(0, 2); lower=0) # group-level SDs intercepts αⱼ ~ filldist(Normal(0, τ), n_gr) # group-level intercepts #likelihood ŷ = α .+ X * β .+ αⱼ[idx] - y ~ MvNormal(ŷ, σ) + y ~ MvNormal(ŷ, σ^2 * I) end; # ### Random-Slope Model @@ -137,16 +137,16 @@ end; @model function varying_slope(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2)) #priors - α ~ Normal(mean(y), 2.5 * std(y)) # population-level intercept - σ ~ Exponential(1 / std(y)) # residual SD + α ~ Normal(mean(y), 2.5 * std(y)) # population-level intercept + σ ~ Exponential(1 / std(y)) # residual SD #prior for variance of random slopes #usually requires thoughtful specification - τ ~ filldist(truncated(Cauchy(0, 2), 0, Inf), n_gr) # group-level slopes SDs - βⱼ ~ filldist(Normal(0, 1), predictors, n_gr) # group-level standard normal slopes + τ ~ filldist(truncated(Cauchy(0, 2); lower=0), n_gr) # group-level slopes SDs + βⱼ ~ filldist(Normal(0, 1), predictors, n_gr) # group-level standard normal slopes #likelihood ŷ = α .+ X * βⱼ * τ - y ~ MvNormal(ŷ, σ) + y ~ MvNormal(ŷ, σ^2 * I) end; # ### Random-Intercept-Slope Model @@ -177,20 +177,30 @@ end; @model function varying_intercept_slope(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2)) #priors - α ~ Normal(mean(y), 2.5 * std(y)) # population-level intercept - σ ~ Exponential(1 / std(y)) # residual SD + α ~ Normal(mean(y), 2.5 * std(y)) # population-level intercept + σ ~ Exponential(1 / std(y)) # residual SD #prior for variance of random intercepts and slopes #usually requires thoughtful specification - τₐ ~ truncated(Cauchy(0, 2), 0, Inf) # group-level SDs intercepts - τᵦ ~ filldist(truncated(Cauchy(0, 2), 0, Inf), n_gr) # group-level slopes SDs - αⱼ ~ filldist(Normal(0, τₐ), n_gr) # group-level intercepts - βⱼ ~ filldist(Normal(0, 1), predictors, n_gr) # group-level standard normal slopes + τₐ ~ truncated(Cauchy(0, 2); lower=0) # group-level SDs intercepts + τᵦ ~ filldist(truncated(Cauchy(0, 2); lower=0), n_gr) # group-level slopes SDs + αⱼ ~ filldist(Normal(0, τₐ), n_gr) # group-level intercepts + βⱼ ~ filldist(Normal(0, 1), predictors, n_gr) # group-level standard normal slopes #likelihood ŷ = α .+ αⱼ[idx] .+ X * βⱼ * τᵦ - y ~ MvNormal(ŷ, σ) + y ~ MvNormal(ŷ, σ^2 * I) end; +# In all of the models, +# we are using the `MvNormal` construction where we specify both +# a vector of means (first positional argument) +# and a covariance matrix (second positional argument). +# Regarding the covariance matrix `σ^2 * I`, +# it uses the model's errors `σ`, here parameterized as a standard deviation, +# squares it to produce a variance paramaterization, +# and multiplies by `I`, which is Julia's `LinearAlgebra` standard module implementation +# to represent an identity matrix of any size. + # ## Example - Cheese Ratings # For our example, I will use a famous dataset called `cheese` (Boatwright, McCulloch & Rossi, 1999), which is data from diff --git a/_literate/11_Turing_tricks.jl b/_literate/11_Turing_tricks.jl index 12810483..79bcd730 100644 --- a/_literate/11_Turing_tricks.jl +++ b/_literate/11_Turing_tricks.jl @@ -56,7 +56,7 @@ setprogress!(false) # hide σ ~ Exponential(1) #likelihood - y ~ MvNormal(α .+ X * β, σ) + y ~ MvNormal(α .+ X * β, σ^2 * I) end; using DataFrames, CSV, HTTP @@ -184,12 +184,12 @@ chain_ncp_funnel = sample(ncp_funnel(), NUTS(), MCMCThreads(), 2_000, 4) σ ~ Exponential(1 / std(y)) # residual SD #prior for variance of random intercepts #usually requires thoughtful specification - τ ~ truncated(Cauchy(0, 2), 0, Inf) # group-level SDs intercepts + τ ~ truncated(Cauchy(0, 2); lower=0) # group-level SDs intercepts αⱼ ~ filldist(Normal(0, τ), n_gr) # CP group-level intercepts #likelihood - ŷ = α .+ X * β .+ αⱼ[idx] - y ~ MvNormal(ŷ, σ) + ŷ = α .+ X * β .+ αⱼ[idx] + y ~ MvNormal(ŷ, σ^2 * I) end; # To perform a Non-Centered Parametrization (NCP) in this model we do as following: @@ -202,12 +202,12 @@ end; #prior for variance of random intercepts #usually requires thoughtful specification - τ ~ truncated(Cauchy(0, 2), 0, Inf) # group-level SDs intercepts + τ ~ truncated(Cauchy(0, 2); lower=0) # group-level SDs intercepts zⱼ ~ filldist(Normal(0, 1), n_gr) # NCP group-level intercepts #likelihood - ŷ = α .+ X * β .+ zⱼ[idx] .* τ - y ~ MvNormal(ŷ, σ) + ŷ = α .+ X * β .+ zⱼ[idx] .* τ + y ~ MvNormal(ŷ, σ^2 * I) end; # Here we are using a NCP with the `zⱼ`s following a standard normal and we reconstruct the diff --git a/_literate/12_epi_models.jl b/_literate/12_epi_models.jl index 2914c062..3bb0738b 100644 --- a/_literate/12_epi_models.jl +++ b/_literate/12_epi_models.jl @@ -158,9 +158,9 @@ setprogress!(false) # hide l = length(infected) #priors - β ~ TruncatedNormal(2, 1, 1e-4, 10) # using 10 instead of Inf because numerical issues arose - γ ~ TruncatedNormal(0.4, 0.5, 1e-4, 10) # using 10 instead of Inf because numerical issues arose - ϕ⁻ ~ truncated(Exponential(5), 0, 1e5) + β ~ TruncatedNormal(2, 1, 1e-4, 10) # using 10 because numerical issues arose + γ ~ TruncatedNormal(0.4, 0.5, 1e-4, 10) # using 10 because numerical issues arose + ϕ⁻ ~ truncated(Exponential(5); lower=0, upper=1e5) ϕ = 1.0 / ϕ⁻ #ODE Stuff diff --git a/_literate/6_linear_reg.jl b/_literate/6_linear_reg.jl index 4d376922..a710497e 100644 --- a/_literate/6_linear_reg.jl +++ b/_literate/6_linear_reg.jl @@ -84,7 +84,7 @@ setprogress!(false) # hide σ ~ Exponential(1) #likelihood - y ~ MvNormal(α .+ X * β, σ) + y ~ MvNormal(α .+ X * β, σ^2 * I) end; # Here I am specifying very weakly informative priors: @@ -93,6 +93,15 @@ end; # * $\boldsymbol{\beta} \sim \text{Student-}t(0,1,3)$ -- The predictors all have a prior distribution of a Student-$t$ distribution centered on 0 with variance 1 and degrees of freedom $\nu = 3$. That wide-tailed $t$ distribution will cover all possible values for our coefficients. Remember the Student-$t$ also has support over all the real number line $\in (-\infty, +\infty)$. Also the `filldist()` is a nice Turing's function which takes any univariate or multivariate distribution and returns another distribution that repeats the input distribution. # * $\sigma \sim \text{Exponential}(1)$ -- A wide-tailed-positive-only distribution perfectly suited for our model's error. +# Also, we are using the `MvNormal` construction where we specify both +# a vector of means (first positional argument) +# and a covariance matrix (second positional argument). +# Regarding the covariance matrix `σ^2 * I`, +# it uses the model's errors `σ`, here parameterized as a standard deviation, +# squares it to produce a variance paramaterization, +# and multiplies by `I`, which is Julia's `LinearAlgebra` standard module implementation +# to represent an identity matrix of any size. + # ## Example - Children's IQ Score # For our example, I will use a famous dataset called `kidiq` (Gelman & Hill, 2007), which is data from a survey of adult American women and their respective children. Dated from 2007, it has 434 observations and 4 variables: