TuringLang · sethaxen · Jan 19, 2023 · Jan 16, 2023 · Jan 16, 2023 · Jan 16, 2023
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -27,6 +27,7 @@ BDAESSMethod
 
 ```@docs
 mcse
+mcse_sbm
 ```
 
 ## R⋆ diagnostic

diff --git a/src/MCMCDiagnosticTools.jl b/src/MCMCDiagnosticTools.jl
@@ -20,7 +20,7 @@ export ess_rhat, ess_rhat_bulk, ess_tail, rhat_tail, ESSMethod, FFTESSMethod, BD
 export gelmandiag, gelmandiag_multivariate
 export gewekediag
 export heideldiag
-export mcse
+export mcse, mcse_sbm
 export rafterydiag
 export rstar
 

diff --git a/src/ess.jl b/src/ess.jl
@@ -222,7 +222,7 @@ For a given estimand, it is recommended that the ESS is at least `100 * chains`
 ``\\widehat{R} < 1.01``.[^VehtariGelman2021]
 
 See also: [`ESSMethod`](@ref), [`FFTESSMethod`](@ref), [`BDAESSMethod`](@ref),
-[`ess_rhat_bulk`](@ref), [`ess_tail`](@ref), [`rhat_tail`](@ref)
+[`ess_rhat_bulk`](@ref), [`ess_tail`](@ref), [`rhat_tail`](@ref), [`mcse`](@ref)
 
 ## Estimators
 
@@ -470,7 +470,9 @@ rhat_tail(x; kwargs...) = ess_rhat_bulk(_fold_around_median(x); kwargs...)[2]
 # If no proxy expectand for `f` is known, `nothing` is returned.
 _expectand_proxy(f, x) = nothing
 function _expectand_proxy(::typeof(Statistics.median), x)
-    return x .≤ Statistics.median(x; dims=(1, 2))
+    y = similar(x)
+    y .= x .≤ Statistics.median(x; dims=(1, 2))
+    return y
 end
 function _expectand_proxy(::typeof(Statistics.std), x)
     return (x .- Statistics.mean(x; dims=(1, 2))) .^ 2
@@ -480,7 +482,7 @@ function _expectand_proxy(::typeof(StatsBase.mad), x)
     return _expectand_proxy(Statistics.median, x_folded)
 end
 function _expectand_proxy(f::Base.Fix2{typeof(Statistics.quantile),<:Real}, x)
-    y = similar(x, Bool)
+    y = similar(x)
     # currently quantile does not support a dims keyword argument
     for (xi, yi) in zip(eachslice(x; dims=3), eachslice(y; dims=3))
         yi .= xi .≤ f(vec(xi))

diff --git a/src/gewekediag.jl b/src/gewekediag.jl
@@ -12,6 +12,8 @@ samples are independent.  A non-significant test p-value indicates convergence.
 p-values indicate non-convergence and the possible need to discard initial samples as a
 burn-in sequence or to simulate additional samples.
 
+`kwargs` are forwarded to [`mcse`](@ref).
+
 [^Geweke1991]: Geweke, J. F. (1991). Evaluating the accuracy of sampling-based approaches to the calculation of posterior moments (No. 148). Federal Reserve Bank of Minneapolis.
 """
 function gewekediag(x::AbstractVector{<:Real}; first::Real=0.1, last::Real=0.5, kwargs...)
@@ -22,9 +24,11 @@ function gewekediag(x::AbstractVector{<:Real}; first::Real=0.1, last::Real=0.5,
     n = length(x)
     x1 = x[1:round(Int, first * n)]
     x2 = x[round(Int, n - last * n + 1):n]
-    z =
-        (Statistics.mean(x1) - Statistics.mean(x2)) /
-        hypot(mcse(x1; kwargs...), mcse(x2; kwargs...))
+    s = hypot(
+        Base.first(mcse(Statistics.mean, reshape(x1, :, 1, 1); split_chains=1, kwargs...)),
+        Base.first(mcse(Statistics.mean, reshape(x2, :, 1, 1); split_chains=1, kwargs...)),
+    )
+    z = (Statistics.mean(x1) - Statistics.mean(x2)) / s
     p = SpecialFunctions.erfc(abs(z) / sqrt(2))
 
     return (zscore=z, pvalue=p)

diff --git a/src/heideldiag.jl b/src/heideldiag.jl
@@ -9,6 +9,8 @@ means are within a target ratio. Stationarity is rejected (0) for significant te
 Halfwidth tests are rejected (0) if observed ratios are greater than the target, as is the
 case for `s2` and `beta[1]`.
 
+`kwargs` are forwarded to [`mcse`](@ref).
+
 [^Heidelberger1983]: Heidelberger, P., & Welch, P. D. (1983). Simulation run length control in the presence of an initial transient. Operations Research, 31(6), 1109-1144.
 """
 function heideldiag(
@@ -17,7 +19,8 @@ function heideldiag(
     n = length(x)
     delta = trunc(Int, 0.10 * n)
     y = x[trunc(Int, n / 2):end]
-    S0 = length(y) * mcse(y; kwargs...)^2
+    s = first(mcse(Statistics.mean, reshape(y, :, 1, 1); split_chains=1, kwargs...))
+    S0 = length(y) * s^2
     i, pvalue, converged, ybar = 1, 1.0, false, NaN
     while i < n / 2
         y = x[i:end]
@@ -33,7 +36,8 @@ function heideldiag(
         end
         i += delta
     end
-    halfwidth = sqrt(2) * SpecialFunctions.erfcinv(alpha) * mcse(y; kwargs...)
+    s = first(mcse(Statistics.mean, reshape(y, :, 1, 1); split_chains=1, kwargs...))
+    halfwidth = sqrt(2) * SpecialFunctions.erfcinv(alpha) * s
     passed = halfwidth / abs(ybar) <= eps
     return (
         burnin=i + start - 2,

diff --git a/src/mcse.jl b/src/mcse.jl
@@ -1,72 +1,142 @@
+const normcdf1 = 0.8413447460685429  # StatsFuns.normcdf(1)
+const normcdfn1 = 0.15865525393145705  # StatsFuns.normcdf(-1)
+
 """
-    mcse(x::AbstractVector{<:Real}; method::Symbol=:imse, kwargs...)
+    mcse(estimator, samples::AbstractArray{<:Union{Missing,Real}}; kwargs...)
+
+Estimate the Monte Carlo standard errors (MCSE) of the `estimator` applied to `samples` of
+shape `(draws, chains, parameters)`
+
+See also: [`ess_rhat`](@ref)
 
-Compute the Monte Carlo standard error (MCSE) of samples `x`.
-The optional argument `method` describes how the errors are estimated. Possible options are:
+## Estimators
 
-- `:bm` for batch means [^Glynn1991]
-- `:imse` initial monotone sequence estimator [^Geyer1992]
-- `:ipse` initial positive sequence estimator [^Geyer1992]
+`estimator` must accept a vector of the same eltype as `samples` and return a real estimate.
 
-[^Glynn1991]: Glynn, P. W., & Whitt, W. (1991). Estimating the asymptotic variance with batch means. Operations Research Letters, 10(8), 431-435.
+For the following estimators, the effective sample size [`ess_rhat`](@ref) and an estimate
+of the asymptotic variance are used to compute the MCSE, and `kwargs` are forwarded to
+`ess_rhat`:
+- `Statistics.mean`
+- `Statistics.median`
+- `Statistics.std`
+- `Base.Fix2(Statistics.quantile, p::Real)`
 
-[^Geyer1992]: Geyer, C. J. (1992). Practical Markov Chain Monte Carlo. Statistical Science, 473-483.
+For arbitrary estimator, the subsampling bootstrap method [`mcse_sbm`](@ref) is used, and
+`kwargs` are forwarded to that function.
 """
-function mcse(x::AbstractVector{<:Real}; method::Symbol=:imse, kwargs...)
-    return if method === :bm
-        mcse_bm(x; kwargs...)
-    elseif method === :imse
-        mcse_imse(x)
-    elseif method === :ipse
-        mcse_ipse(x)
-    else
-        throw(ArgumentError("unsupported MCSE method $method"))
+mcse(f, x::AbstractArray{<:Union{Missing,Real},3}; kwargs...) = mcse_sbm(f, x; kwargs...)
+function mcse(
+    ::typeof(Statistics.mean), samples::AbstractArray{<:Union{Missing,Real},3}; kwargs...
+)
+    S = ess_rhat(Statistics.mean, samples; kwargs...)[1]
+    return dropdims(Statistics.std(samples; dims=(1, 2)); dims=(1, 2)) ./ sqrt.(S)
+end
+function mcse(
+    ::typeof(Statistics.std), samples::AbstractArray{<:Union{Missing,Real},3}; kwargs...
+)
+    x = (samples .- Statistics.mean(samples; dims=(1, 2))) .^ 2  # expectand proxy
+    S = ess_rhat(Statistics.mean, x; kwargs...)[1]
+    # asymptotic variance of sample variance estimate is Var[var] = E[μ₄] - E[var]²,
+    # where μ₄ is the 4th central moment
+    # by the delta method, Var[std] = Var[var] / 4E[var] = (E[μ₄]/E[var] - E[var])/4,
+    # See e.g. Chapter 3 of Van der Vaart, AW. (200) Asymptotic statistics. Vol. 3.
+    mean_var = dropdims(Statistics.mean(x; dims=(1, 2)); dims=(1, 2))
+    mean_moment4 = dropdims(Statistics.mean(abs2, x; dims=(1, 2)); dims=(1, 2))
+    return @. sqrt((mean_moment4 / mean_var - mean_var) / S) / 2
+end
+function mcse(
+    f::Base.Fix2{typeof(Statistics.quantile),<:Real},
+    samples::AbstractArray{<:Union{Missing,Real},3};
+    kwargs...,
+)
+    p = f.x
+    S = ess_rhat(f, samples; kwargs...)[1]
+    T = eltype(S)
+    R = promote_type(eltype(samples), typeof(oneunit(eltype(samples)) / sqrt(oneunit(T))))
+    values = similar(S, R)
+    for (i, xi, Si) in zip(eachindex(values), eachslice(samples; dims=3), S)
+        values[i] = _mcse_quantile(vec(xi), p, Si)
     end
+    return values
+end
+function mcse(
+    ::typeof(Statistics.median), samples::AbstractArray{<:Union{Missing,Real},3}; kwargs...
+)
+    S = ess_rhat(Statistics.median, samples; kwargs...)[1]
+    T = eltype(S)
+    R = promote_type(eltype(samples), typeof(oneunit(eltype(samples)) / sqrt(oneunit(T))))
+    values = similar(S, R)
+    for (i, xi, Si) in zip(eachindex(values), eachslice(samples; dims=3), S)
+        values[i] = _mcse_quantile(vec(xi), 1//2, Si)
+    end
+    return values
 end
 
-function mcse_bm(x::AbstractVector{<:Real}; size::Int=floor(Int, sqrt(length(x))))
-    n = length(x)
-    m = min(div(n, 2), size)
-    m == size || @warn "batch size was reduced to $m"
-    mcse = StatsBase.sem(Statistics.mean(@view(x[(i + 1):(i + m)])) for i in 0:m:(n - m))
-    return mcse
+function _mcse_quantile(x, p, Seff)
+    Seff === missing && return missing
+    S = length(x)
+    # quantile error distribution is asymptotically normal; estimate σ (mcse) with 2
+    # quadrature points: xl and xu, chosen as quantiles so that xu - xl = 2σ
+    # compute quantiles of error distribution in probability space (i.e. quantiles passed through CDF)
+    # Beta(α,β) is the approximate error distribution of quantile estimates
+    α = Seff * p + 1
+    β = Seff * (1 - p) + 1
+    prob_x_upper = StatsFuns.betainvcdf(α, β, normcdf1)
+    prob_x_lower = StatsFuns.betainvcdf(α, β, normcdfn1)
+    # use inverse ECDF to get quantiles in quantile (x) space
+    l = max(floor(Int, prob_x_lower * S), 1)
+    u = min(ceil(Int, prob_x_upper * S), S)
+    iperm = partialsortperm(x, l:u)  # sort as little of x as possible
+    xl = x[first(iperm)]
+    xu = x[last(iperm)]
+    # estimate mcse from quantiles
+    return (xu - xl) / 2
 end
 
-function mcse_imse(x::AbstractVector{<:Real})
-    n = length(x)
-    lags = [0, 1]
-    ghat = StatsBase.autocov(x, lags)
-    Ghat = sum(ghat)
-    @inbounds value = Ghat + ghat[2]
-    @inbounds for i in 2:2:(n - 2)
-        lags[1] = i
-        lags[2] = i + 1
-        StatsBase.autocov!(ghat, x, lags)
-        Ghat = min(Ghat, sum(ghat))
-        Ghat > 0 || break
-        value += 2 * Ghat
-    end
+"""
+    mcse_sbm(estimator, samples::AbstractArray{<:Union{Missing,Real},3}; batch_size)
 
-    mcse = sqrt(value / n)
+Estimate the Monte Carlo standard errors (MCSE) of the `estimator` applied to `samples`
+using the subsampling bootstrap method (SBM).[^FlegalJones2011][^Flegal2012]
 
-    return mcse
-end
+`samples` has shape `(draws, chains, parameters)`, and `estimator` must accept a vector of
+the same eltype as `samples` and return a real estimate.
 
-function mcse_ipse(x::AbstractVector{<:Real})
-    n = length(x)
-    lags = [0, 1]
-    ghat = StatsBase.autocov(x, lags)
-    @inbounds value = ghat[1] + 2 * ghat[2]
-    @inbounds for i in 2:2:(n - 2)
-        lags[1] = i
-        lags[2] = i + 1
-        StatsBase.autocov!(ghat, x, lags)
-        Ghat = sum(ghat)
-        Ghat > 0 || break
-        value += 2 * Ghat
-    end
+`batch_size` indicates the size of the overlapping batches used to estimate the MCSE,
+defaulting to `floor(Int, sqrt(draws * chains))`.
 
-    mcse = sqrt(value / n)
+!!! note
+    SBM tends to underestimate the MCSE, especially for highly autocorrelated chains.
+    SBM should only be used as a fallbeck when a specific [`mcse`](@ref) method for
+    `estimator` is not available and when the bulk- and tail- [`ess_rhat`](@ref) values
+    indicate low autocorrelation.
 
-    return mcse
+[^FlegalJones2011]: Flegal JM, Jones GL. (2011) Implementing MCMC: estimating with confidence.
+                    Handbook of Markov Chain Monte Carlo. pp. 175-97.
+                    [pdf](http://faculty.ucr.edu/~jflegal/EstimatingWithConfidence.pdf)
+[^Flegal2012]: Flegal JM. (2012) Applicability of subsampling bootstrap methods in Markov chain Monte Carlo.
+               Monte Carlo and Quasi-Monte Carlo Methods 2010. pp. 363-72.
+               doi: [10.1007/978-3-642-27440-4_18](https://doi.org/10.1007/978-3-642-27440-4_18)
+"""
+function mcse_sbm(
+    f,
+    x::AbstractArray{<:Union{Missing,Real},3};
+    batch_size::Int=floor(Int, sqrt(size(x, 1) * size(x, 2))),
+)
+    T = promote_type(eltype(x), typeof(zero(eltype(x)) / 1))
+    values = similar(x, T, (axes(x, 3),))
+    for (i, xi) in zip(eachindex(values), eachslice(x; dims=3))
+        values[i] = _mcse_sbm(f, vec(xi); batch_size=batch_size)
+    end
+    return values
+end
+function _mcse_sbm(f, x; batch_size)
+    any(x -> x === missing, x) && return missing
+    n = length(x)
+    i1 = firstindex(x)
+    v = Statistics.var(
+        f(view(x, i:(i + batch_size - 1))) for i in i1:(i1 + n - batch_size);
+        corrected=false,
+    )
+    return sqrt(v * (batch_size//n))
 end
diff --git a/test/ess.jl b/test/ess.jl
@@ -32,44 +32,6 @@ function LogDensityProblems.capabilities(p::CauchyProblem)
     return LogDensityProblems.LogDensityOrder{1}()
 end
 
-# AR(1) process
-function ar1(φ::Real, σ::Real, n::Int...)
-    T = float(Base.promote_eltype(φ, σ))
-    x = randn(T, n...)
-    x .*= σ
-    accumulate!(x, x; dims=1) do xi, ϵ
-        return muladd(φ, xi, ϵ)
-    end
-    return x
-end
-
-asymptotic_dist(::typeof(mean), dist) = Normal(mean(dist), std(dist))
-function asymptotic_dist(::typeof(var), dist)
-    μ = var(dist)
-    σ = μ * sqrt(kurtosis(dist) + 2)
-    return Normal(μ, σ)
-end
-function asymptotic_dist(::typeof(std), dist)
-    μ = std(dist)
-    σ = μ * sqrt(kurtosis(dist) + 2) / 2
-    return Normal(μ, σ)
-end
-asymptotic_dist(::typeof(median), dist) = asymptotic_dist(Base.Fix2(quantile, 1//2), dist)
-function asymptotic_dist(f::Base.Fix2{typeof(quantile),<:Real}, dist)
-    p = f.x
-    μ = quantile(dist, p)
-    σ = sqrt(p * (1 - p)) / pdf(dist, μ)
-    return Normal(μ, σ)
-end
-function asymptotic_dist(::typeof(mad), dist::Normal)
-    # Example 21.10 of Asymptotic Statistics. Van der Vaart
-    d = Normal(zero(dist.μ), dist.σ)
-    dtrunc = truncated(d; lower=0)
-    μ = median(dtrunc)
-    σ = 1 / (4 * pdf(d, quantile(d, 3//4)))
-    return Normal(μ, σ) / quantile(Normal(), 3//4)
-end
-
 @testset "ess.jl" begin
     @testset "ESS and R̂ (IID samples)" begin
         # Repeat tests with different scales
@@ -199,7 +161,7 @@ end
         # estimand, and estimating the ESS for the chosen estimator, computing the
         # corresponding MCSE, and checking that the mean estimand is close to the asymptotic
         # value of the estimand, with a tolerance chosen using the MCSE.
-        ndraws = 1_000
+        ndraws = 100
         nchains = 4
         nparams = 100
         x = randn(ndraws, nchains, nparams)
@@ -219,7 +181,7 @@ end
             x .= quantile.(dist, cdf.(Normal(), x))  # stationary distribution is dist
             μ_mean = dropdims(mapslices(f ∘ vec, x; dims=(1, 2)); dims=(1, 2))
             dist = asymptotic_dist(f, dist)
-            n = ess_rhat(f, x)[1]
+            n = @inferred(ess_rhat(f, x))[1]
             μ = mean(dist)
             mcse = sqrt.(var(dist) ./ n)
             for i in eachindex(μ_mean, mcse)

diff --git a/test/helpers.jl b/test/helpers.jl
@@ -0,0 +1,39 @@
+using Distributions, Statistics, StatsBase
+
+# AR(1) process
+function ar1(φ::Real, σ::Real, n::Int...)
+    T = float(Base.promote_eltype(φ, σ))
+    x = randn(T, n...)
+    x .*= σ
+    accumulate!(x, x; dims=1) do xi, ϵ
+        return muladd(φ, xi, ϵ)
+    end
+    return x
+end
+
+asymptotic_dist(::typeof(mean), dist) = Normal(mean(dist), std(dist))
+function asymptotic_dist(::typeof(var), dist)
+    μ = var(dist)
+    σ = μ * sqrt(kurtosis(dist) + 2)
+    return Normal(μ, σ)
+end
+function asymptotic_dist(::typeof(std), dist)
+    μ = std(dist)
+    σ = μ * sqrt(kurtosis(dist) + 2) / 2
+    return Normal(μ, σ)
+end
+asymptotic_dist(::typeof(median), dist) = asymptotic_dist(Base.Fix2(quantile, 1//2), dist)
+function asymptotic_dist(f::Base.Fix2{typeof(quantile),<:Real}, dist)
+    p = f.x
+    μ = quantile(dist, p)
+    σ = sqrt(p * (1 - p)) / pdf(dist, μ)
+    return Normal(μ, σ)
+end
+function asymptotic_dist(::typeof(mad), dist::Normal)
+    # Example 21.10 of Asymptotic Statistics. Van der Vaart
+    d = Normal(zero(dist.μ), dist.σ)
+    dtrunc = truncated(d; lower=0)
+    μ = median(dtrunc)
+    σ = 1 / (4 * pdf(d, quantile(d, 3//4)))
+    return Normal(μ, σ) / quantile(Normal(), 3//4)
+end