-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Redesign of MCSE #63
Redesign of MCSE #63
Changes from 37 commits
2d82222
e4b067b
34e0221
6839cd7
7dbda2d
b93ce5b
790a99b
9a1d3d5
c0d5a94
cf908af
93d121e
fe99356
d12648b
465afac
7dac85e
9407369
f95a066
88b6c41
899711e
01b8dbc
60d6441
2441bcb
8e3b06a
e7ca85a
2af67e9
da4ed63
b7cd495
4d55716
d9f6734
1c48266
f072b9e
bb47887
7f61907
a03cc2a
bac8a3c
652b86f
8dbae84
d9aff61
cced4be
ce9d427
787a05f
34f3771
d10740a
cf09e4e
39d74a9
a575fc8
b4eea8d
3738347
69fe0a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ BDAESSMethod | |
|
||
```@docs | ||
mcse | ||
mcse_sbm | ||
``` | ||
|
||
## R⋆ diagnostic | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,72 +1,142 @@ | ||
const normcdf1 = 0.8413447460685429 # StatsFuns.normcdf(1) | ||
const normcdfn1 = 0.15865525393145705 # StatsFuns.normcdf(-1) | ||
|
||
""" | ||
mcse(x::AbstractVector{<:Real}; method::Symbol=:imse, kwargs...) | ||
mcse(estimator, samples::AbstractArray{<:Union{Missing,Real}}; kwargs...) | ||
|
||
Estimate the Monte Carlo standard errors (MCSE) of the `estimator` applied to `samples` of | ||
shape `(draws, chains, parameters)` | ||
sethaxen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
See also: [`ess_rhat`](@ref) | ||
|
||
Compute the Monte Carlo standard error (MCSE) of samples `x`. | ||
The optional argument `method` describes how the errors are estimated. Possible options are: | ||
## Estimators | ||
|
||
- `:bm` for batch means [^Glynn1991] | ||
- `:imse` initial monotone sequence estimator [^Geyer1992] | ||
- `:ipse` initial positive sequence estimator [^Geyer1992] | ||
`estimator` must accept a vector of the same eltype as `samples` and return a real estimate. | ||
sethaxen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
[^Glynn1991]: Glynn, P. W., & Whitt, W. (1991). Estimating the asymptotic variance with batch means. Operations Research Letters, 10(8), 431-435. | ||
For the following estimators, the effective sample size [`ess_rhat`](@ref) and an estimate | ||
of the asymptotic variance are used to compute the MCSE, and `kwargs` are forwarded to | ||
`ess_rhat`: | ||
- `Statistics.mean` | ||
- `Statistics.median` | ||
- `Statistics.std` | ||
- `Base.Fix2(Statistics.quantile, p::Real)` | ||
|
||
[^Geyer1992]: Geyer, C. J. (1992). Practical Markov Chain Monte Carlo. Statistical Science, 473-483. | ||
For arbitrary estimator, the subsampling bootstrap method [`mcse_sbm`](@ref) is used, and | ||
sethaxen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
`kwargs` are forwarded to that function. | ||
""" | ||
function mcse(x::AbstractVector{<:Real}; method::Symbol=:imse, kwargs...) | ||
return if method === :bm | ||
mcse_bm(x; kwargs...) | ||
elseif method === :imse | ||
mcse_imse(x) | ||
elseif method === :ipse | ||
mcse_ipse(x) | ||
else | ||
throw(ArgumentError("unsupported MCSE method $method")) | ||
mcse(f, x::AbstractArray{<:Union{Missing,Real},3}; kwargs...) = mcse_sbm(f, x; kwargs...) | ||
function mcse( | ||
::typeof(Statistics.mean), samples::AbstractArray{<:Union{Missing,Real},3}; kwargs... | ||
) | ||
S = ess_rhat(Statistics.mean, samples; kwargs...)[1] | ||
sethaxen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return dropdims(Statistics.std(samples; dims=(1, 2)); dims=(1, 2)) ./ sqrt.(S) | ||
end | ||
function mcse( | ||
::typeof(Statistics.std), samples::AbstractArray{<:Union{Missing,Real},3}; kwargs... | ||
) | ||
x = (samples .- Statistics.mean(samples; dims=(1, 2))) .^ 2 # expectand proxy | ||
S = ess_rhat(Statistics.mean, x; kwargs...)[1] | ||
# asymptotic variance of sample variance estimate is Var[var] = E[μ₄] - E[var]², | ||
# where μ₄ is the 4th central moment | ||
# by the delta method, Var[std] = Var[var] / 4E[var] = (E[μ₄]/E[var] - E[var])/4, | ||
# See e.g. Chapter 3 of Van der Vaart, AW. (200) Asymptotic statistics. Vol. 3. | ||
mean_var = dropdims(Statistics.mean(x; dims=(1, 2)); dims=(1, 2)) | ||
mean_moment4 = dropdims(Statistics.mean(abs2, x; dims=(1, 2)); dims=(1, 2)) | ||
return @. sqrt((mean_moment4 / mean_var - mean_var) / S) / 2 | ||
end | ||
function mcse( | ||
f::Base.Fix2{typeof(Statistics.quantile),<:Real}, | ||
samples::AbstractArray{<:Union{Missing,Real},3}; | ||
kwargs..., | ||
) | ||
p = f.x | ||
S = ess_rhat(f, samples; kwargs...)[1] | ||
T = eltype(S) | ||
R = promote_type(eltype(samples), typeof(oneunit(eltype(samples)) / sqrt(oneunit(T)))) | ||
values = similar(S, R) | ||
for (i, xi, Si) in zip(eachindex(values), eachslice(samples; dims=3), S) | ||
values[i] = _mcse_quantile(vec(xi), p, Si) | ||
end | ||
return values | ||
end | ||
function mcse( | ||
::typeof(Statistics.median), samples::AbstractArray{<:Union{Missing,Real},3}; kwargs... | ||
) | ||
S = ess_rhat(Statistics.median, samples; kwargs...)[1] | ||
T = eltype(S) | ||
R = promote_type(eltype(samples), typeof(oneunit(eltype(samples)) / sqrt(oneunit(T)))) | ||
values = similar(S, R) | ||
for (i, xi, Si) in zip(eachindex(values), eachslice(samples; dims=3), S) | ||
values[i] = _mcse_quantile(vec(xi), 1//2, Si) | ||
end | ||
return values | ||
end | ||
|
||
function mcse_bm(x::AbstractVector{<:Real}; size::Int=floor(Int, sqrt(length(x)))) | ||
n = length(x) | ||
m = min(div(n, 2), size) | ||
m == size || @warn "batch size was reduced to $m" | ||
mcse = StatsBase.sem(Statistics.mean(@view(x[(i + 1):(i + m)])) for i in 0:m:(n - m)) | ||
return mcse | ||
function _mcse_quantile(x, p, Seff) | ||
Seff === missing && return missing | ||
S = length(x) | ||
# quantile error distribution is asymptotically normal; estimate σ (mcse) with 2 | ||
# quadrature points: xl and xu, chosen as quantiles so that xu - xl = 2σ | ||
# compute quantiles of error distribution in probability space (i.e. quantiles passed through CDF) | ||
# Beta(α,β) is the approximate error distribution of quantile estimates | ||
α = Seff * p + 1 | ||
β = Seff * (1 - p) + 1 | ||
prob_x_upper = StatsFuns.betainvcdf(α, β, normcdf1) | ||
prob_x_lower = StatsFuns.betainvcdf(α, β, normcdfn1) | ||
# use inverse ECDF to get quantiles in quantile (x) space | ||
l = max(floor(Int, prob_x_lower * S), 1) | ||
u = min(ceil(Int, prob_x_upper * S), S) | ||
iperm = partialsortperm(x, l:u) # sort as little of x as possible | ||
xl = x[first(iperm)] | ||
xu = x[last(iperm)] | ||
# estimate mcse from quantiles | ||
return (xu - xl) / 2 | ||
end | ||
|
||
function mcse_imse(x::AbstractVector{<:Real}) | ||
n = length(x) | ||
lags = [0, 1] | ||
ghat = StatsBase.autocov(x, lags) | ||
Ghat = sum(ghat) | ||
@inbounds value = Ghat + ghat[2] | ||
@inbounds for i in 2:2:(n - 2) | ||
lags[1] = i | ||
lags[2] = i + 1 | ||
StatsBase.autocov!(ghat, x, lags) | ||
Ghat = min(Ghat, sum(ghat)) | ||
Ghat > 0 || break | ||
value += 2 * Ghat | ||
end | ||
""" | ||
mcse_sbm(estimator, samples::AbstractArray{<:Union{Missing,Real},3}; batch_size) | ||
|
||
mcse = sqrt(value / n) | ||
Estimate the Monte Carlo standard errors (MCSE) of the `estimator` applied to `samples` | ||
using the subsampling bootstrap method (SBM).[^FlegalJones2011][^Flegal2012] | ||
|
||
return mcse | ||
end | ||
`samples` has shape `(draws, chains, parameters)`, and `estimator` must accept a vector of | ||
the same eltype as `samples` and return a real estimate. | ||
sethaxen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
function mcse_ipse(x::AbstractVector{<:Real}) | ||
n = length(x) | ||
lags = [0, 1] | ||
ghat = StatsBase.autocov(x, lags) | ||
@inbounds value = ghat[1] + 2 * ghat[2] | ||
@inbounds for i in 2:2:(n - 2) | ||
lags[1] = i | ||
lags[2] = i + 1 | ||
StatsBase.autocov!(ghat, x, lags) | ||
Ghat = sum(ghat) | ||
Ghat > 0 || break | ||
value += 2 * Ghat | ||
end | ||
`batch_size` indicates the size of the overlapping batches used to estimate the MCSE, | ||
defaulting to `floor(Int, sqrt(draws * chains))`. | ||
|
||
mcse = sqrt(value / n) | ||
!!! note | ||
SBM tends to underestimate the MCSE, especially for highly autocorrelated chains. | ||
SBM should only be used as a fallbeck when a specific [`mcse`](@ref) method for | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This begs the question whether we should include I lean towards including it with this caveat clearly documented. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we only want it to be used as a fallback, maybe we do not even want to add another function for it but also call it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On second thought, it's useful to have it as a standalone method so if we want, we can compare its results with those of the specific estimators. But we can make the function internal and copy the documentation to
sethaxen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
`estimator` is not available and when the bulk- and tail- [`ess_rhat`](@ref) values | ||
sethaxen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
indicate low autocorrelation. | ||
|
||
return mcse | ||
[^FlegalJones2011]: Flegal JM, Jones GL. (2011) Implementing MCMC: estimating with confidence. | ||
Handbook of Markov Chain Monte Carlo. pp. 175-97. | ||
[pdf](http://faculty.ucr.edu/~jflegal/EstimatingWithConfidence.pdf) | ||
[^Flegal2012]: Flegal JM. (2012) Applicability of subsampling bootstrap methods in Markov chain Monte Carlo. | ||
Monte Carlo and Quasi-Monte Carlo Methods 2010. pp. 363-72. | ||
doi: [10.1007/978-3-642-27440-4_18](https://doi.org/10.1007/978-3-642-27440-4_18) | ||
""" | ||
function mcse_sbm( | ||
f, | ||
x::AbstractArray{<:Union{Missing,Real},3}; | ||
batch_size::Int=floor(Int, sqrt(size(x, 1) * size(x, 2))), | ||
) | ||
T = promote_type(eltype(x), typeof(zero(eltype(x)) / 1)) | ||
values = similar(x, T, (axes(x, 3),)) | ||
for (i, xi) in zip(eachindex(values), eachslice(x; dims=3)) | ||
values[i] = _mcse_sbm(f, vec(xi); batch_size=batch_size) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I haven't seen in the literature a study of how best to combine MCMC chains when estimating MCSE with SBM. I benchmarked a few alternatives and found this one underestimated the MCSE the least: arviz-devs/arviz#1974 (comment) |
||
end | ||
return values | ||
end | ||
function _mcse_sbm(f, x; batch_size) | ||
devmotion marked this conversation as resolved.
Show resolved
Hide resolved
|
||
any(x -> x === missing, x) && return missing | ||
n = length(x) | ||
i1 = firstindex(x) | ||
v = Statistics.var( | ||
f(view(x, i:(i + batch_size - 1))) for i in i1:(i1 + n - batch_size); | ||
corrected=false, | ||
) | ||
return sqrt(v * (batch_size//n)) | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
using Distributions, Statistics, StatsBase | ||
|
||
# AR(1) process | ||
function ar1(φ::Real, σ::Real, n::Int...) | ||
T = float(Base.promote_eltype(φ, σ)) | ||
x = randn(T, n...) | ||
x .*= σ | ||
accumulate!(x, x; dims=1) do xi, ϵ | ||
return muladd(φ, xi, ϵ) | ||
end | ||
return x | ||
end | ||
|
||
asymptotic_dist(::typeof(mean), dist) = Normal(mean(dist), std(dist)) | ||
function asymptotic_dist(::typeof(var), dist) | ||
μ = var(dist) | ||
σ = μ * sqrt(kurtosis(dist) + 2) | ||
return Normal(μ, σ) | ||
end | ||
function asymptotic_dist(::typeof(std), dist) | ||
μ = std(dist) | ||
σ = μ * sqrt(kurtosis(dist) + 2) / 2 | ||
return Normal(μ, σ) | ||
end | ||
asymptotic_dist(::typeof(median), dist) = asymptotic_dist(Base.Fix2(quantile, 1//2), dist) | ||
function asymptotic_dist(f::Base.Fix2{typeof(quantile),<:Real}, dist) | ||
p = f.x | ||
μ = quantile(dist, p) | ||
σ = sqrt(p * (1 - p)) / pdf(dist, μ) | ||
return Normal(μ, σ) | ||
end | ||
function asymptotic_dist(::typeof(mad), dist::Normal) | ||
# Example 21.10 of Asymptotic Statistics. Van der Vaart | ||
d = Normal(zero(dist.μ), dist.σ) | ||
dtrunc = truncated(d; lower=0) | ||
μ = median(dtrunc) | ||
σ = 1 / (4 * pdf(d, quantile(d, 3//4))) | ||
return Normal(μ, σ) / quantile(Normal(), 3//4) | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Necessary because otherwise our transformation discards the type of
x
, so aFloat32
eltypex
will get aFloat64
ESS/R-hat/MCSE.