Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Take non-equidistant samples and adapt mass matrix to Fisher information #114

Merged
merged 9 commits into from
Aug 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/ZigZagBoomerang.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Seed() = gen_seed(UInt64, 2)
include("types.jl")
include("common.jl")
include("oscn.jl")
include("invchol.jl")
include("dynamics.jl")
export ZigZag1d, Boomerang1d, ZigZag, FactBoomerang
const LocalZigZag = ZigZag
Expand Down
13 changes: 0 additions & 13 deletions src/dynamics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,16 +125,3 @@ function refresh!(rng, θ, F::Boomerang)
θ
end

# Use Geometry and pdmats if L is not provided
function ZigZagBoomerang.reflect!(∇ϕx, x, v, F::BouncyParticle{<:Any, <:Any, <:Any, <:AbstractPDMat}) # Seth's version
z = F.U * ∇ϕx
v .-= (2*dot(∇ϕx, v)/dot(∇ϕx, z)) * z
v
end
function ZigZagBoomerang.refresh!(rng, v, F::BouncyParticle{<:Any, <:Any, <:Any, <:AbstractPDMat})
ρ̄ = sqrt(1-F.ρ^2)
v .*= F.ρ
u = ρ̄*PDMats.unwhiten(F.U, randn(rng, length(v)))
v .+= u
v
end
28 changes: 28 additions & 0 deletions src/invchol.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using LinearAlgebra
struct InvChol{T} <: AbstractPDMat{Float64}
R::T
InvChol(R::S) where {S<:UpperTriangular} = new{S}(R)
end
PDMats.dim(M::InvChol) = size(M.R,1)
function PDMats.unwhiten!(r::Vector{Float64}, M::InvChol{UpperTriangular{Float64, Matrix{Float64}}}, z::Vector{Float64})
r .= z
LinearAlgebra.naivesub!(M.R, r)
r
end
function LinearAlgebra.mul!(r::AbstractVector, M::InvChol, x::AbstractVector, alpha::Number, beta::Number)
@assert beta==0
@. r = alpha*x
LinearAlgebra.naivesub!(M.R', r)
LinearAlgebra.naivesub!(M.R, r)
r
end

function PDMats.whiten!(r::Vector{Float64}, M::InvChol{UpperTriangular{Float64, Matrix{Float64}}}, x::Vector{Float64})
mul!(r, M.R, x)
r
end
function Base.show(io::IOContext, m::MIME{Symbol("text/plain")}, M::InvChol)
print(io, "CholInv: ")
show(io, m, M.R)
end
Base.Matrix(M::InvChol) = M.R'*M.R
209 changes: 146 additions & 63 deletions src/not_fact_samplers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,12 @@ end
function ab(x, θ, C::LocalBound, ∇ϕx::AbstractVector, v, B::BouncyParticle)
(C.c + dot(θ, ∇ϕx), v, 2sqrt(length(θ))/C.c/norm(θ, 2))
end
function ab(x, θ, C::LocalBound, vdϕ::Number, v, B::BouncyParticle)
@assert vdϕ isa Number
(C.c + vdϕ, v, 2sqrt(length(θ))/C.c/norm(θ, 2))
end


function ab(x, θ, C::GlobalBound, ∇ϕx, v, B::Boomerang)
(sqrt(normsq(θ) + normsq((x - B.μ)))*C.c, 0.0, Inf)
end
ab(x, θ, c, Flow) = ab(x, θ, GlobalBound(c), nothing, nothing, Flow)
ab(x, θ, c, flow) = ab(x, θ, GlobalBound(c), nothing, nothing, flow)

function event(t, x, θ, Z::Union{BouncyParticle,Boomerang})
t, copy(x), copy(θ), nothing
Expand Down Expand Up @@ -150,64 +147,133 @@ function pdmp(∇ϕ!, t0, x0, θ0, T, c::Bound, Flow::Union{BouncyParticle, Boom
end


##################################
################################## ModernBPS ##################################
function local_speed(t, x, v, F)
1.0
end

# Use Geometry and pdmats if L is not provided
function ZigZagBoomerang.reflect!(∇ϕx, t, x, v, F::BouncyParticle{<:Any, <:Any, <:Any, <:AbstractPDMat}) # Seth's version
z = F.U * ∇ϕx # constant factor cancels
v .-= (2*dot(∇ϕx, v)/dot(∇ϕx, z)) * z
v
end
function ZigZagBoomerang.refresh!(rng, t, x, v, F::BouncyParticle{<:Any, <:Any, <:Any, <:AbstractPDMat})
ρ̄ = sqrt(1-F.ρ^2)
v .*= F.ρ
s = local_speed(t, x, v, F)
u = (s*ρ̄)*PDMats.unwhiten(F.U, randn(rng, length(v)))
v .+= u
record_rate(v, F)
end
function mass_adapt_init(M::InvChol)
Cholesky(M.R)
end

function mass_adapt_init(M)
M.diag
end

function mass_adapt!(M, m)
@. M.diag = 1/m
end
function mass_adapt!(M::InvChol, m)
M.R.data .= m.U.data
end

record_rate(θ, F) = norm(whiten(F.U, θ))

function ab(t, x, θ, V, C::LocalBound, vdϕ::Number, v, B::BouncyParticle)
(C.c + vdϕ, v, t + 2sqrt(length(θ))/C.c/V)
end

function next_event1(rng, u::Tuple, abc, flow)
t, x, v, V = u
a, b, Δ = abc
τ = t + poisson_time(a, b, rand(rng))
τrefresh = t + waiting_time_ref(rng, flow)/V
when, what = findmin((τ, Δ, τrefresh))
return when, (:bounce, :expire, :refresh)[what]
end

function pdmp_inner!(rng, dϕ::F1, ∇ϕ!::F2, ∇ϕx, t, x, θ, V, c::Bound, abc, (t′, action), Δrec, (acc, num),
flow::BouncyParticle, args...; subsample=false, oscn=false, factor=1.5, adapt=false) where {F1, F2}
if action == :invalid # invalidated event
θdϕ, v = dϕ(t, x, θ, flow, args...)
∇ϕ!(∇ϕx, t, x, θ, flow, args...)
abc = ab(t, x, θ, V, c, θdϕ, v, flow)
t′, action = next_event1(rng, (t, x, θ, V), abc, flow)
end

function pdmp_inner!(rng, dϕ::F1, ∇ϕ!::F2, ∇ϕx, t, x, θ, c::Bound, abc, (t′, renew), τref, (acc, num),
Flow::BouncyParticle, args...; subsample=false, oscn=false, factor=1.5, adapt=false) where {F1, F2}
while true
if τref < t′
t, _ = move_forward!(τref - t, t, x, θ, Flow)
refresh!(rng, θ, Flow)
θdϕ, v = dϕ(t, x, θ, args...)
#∇ϕx = grad_correct!(∇ϕx, x, Flow)
l = λ(θdϕ, Flow)
τref = t + waiting_time_ref(rng, Flow)
abc = ab(x, θ, c, θdϕ, v, Flow)
t′, renew = next_time(t, abc, rand(rng))
return t, (acc, num), c, abc, (t′, renew), τref
elseif renew
if t + Δrec/V <= t′ # record! (large V, more records)
τ = V\Δrec
t, _ = move_forward!(τ, t, x, θ, flow)
Δrec = 1/flow.λref
θdϕ, v = dϕ(t, x, θ, flow, args...)
l, lb = θdϕ, pos(abc[1] + abc[2]*τ)
if l > lb # check bounds on recordings
!adapt && error("Tuning parameter `c` too small.")
c *= factor
end
∇ϕ!(∇ϕx, t, x, θ, flow, args...)
abc = ab(t, x, θ, V, c, θdϕ, v, flow)
t′, action = next_event1(rng, (t, x, θ, V), abc, flow)
return t, V, (acc, num), c, abc, (t′, action), Δrec
end
Δrec = Δrec - (t′ - t)*V # coming closer to rec
@assert Δrec > 0.0
if action == :refresh
@assert Δrec >= 0
t, _ = move_forward!(t′ - t, t, x, θ, flow)
V = refresh!(rng, t, x, θ, flow)
θdϕ, v = dϕ(t, x, θ, flow, args...)
abc = ab(t, x, θ, V, c, θdϕ, v, flow)
t′, action = next_event1(rng, (t, x, θ, V), abc, flow)
elseif action == :expire
τ = t′ - t
t, _ = move_forward!(τ, t, x, θ, Flow)
θdϕ, v = dϕ(t, x, θ, args...)
#∇ϕx = grad_correct!(∇ϕx, x, Flow)
abc = ab(x, θ, c, θdϕ, v, Flow)
t′, renew = next_time(t, abc, rand(rng))
else
t, _ = move_forward!(τ, t, x, θ, flow)
θdϕ, v = dϕ(t, x, θ, flow, args...)
abc = ab(t, x, θ, V, c, θdϕ, v, flow)
t′, action = next_event1(rng, (t, x, θ, V), abc, flow)
else # action == :reflect
τ = t′ - t
t, _ = move_forward!(τ, t, x, θ, Flow)
θdϕ, v = dϕ(t, x, θ, args...)
#∇ϕx = grad_correct!(∇ϕx, x, Flow)
l, lb = λ(θdϕ, Flow), pos(abc[1] + abc[2]*τ)
t, _ = move_forward!(τ, t, x, θ, flow)
θdϕ, v = dϕ(t, x, θ, flow, args...)
l, lb = θdϕ, pos(abc[1] + abc[2]*τ)
num += 1
if rand(rng)*lb <= l
acc += 1
if l > lb
!adapt && error("Tuning parameter `c` too small.")
c *= factor
end
∇ϕ!(∇ϕx, t, x, args...)
∇ϕ!(∇ϕx, t, x, θ, flow, args...)
if oscn
@assert Flow.L == I
oscn!(rng, θ, ∇ϕx, Flow.ρ; normalize=false)
@assert flow.L == I
oscn!(rng, θ, ∇ϕx, flow.ρ; normalize=false)
V = record_rate(θ, flow)
else
reflect!(∇ϕx, x, θ, Flow)
reflect!(∇ϕx, t, x, θ, flow)
V = record_rate(θ, flow)
end
θdϕ, v = dϕ(t, x, θ, args...)
#∇ϕx = grad_correct!(∇ϕx, x, Flow)
abc = ab(x, θ, c, θdϕ, v, Flow)
t′, renew = next_time(t, abc, rand(rng))
!subsample && return t, (acc, num), c, abc, (t′, renew), τref
θdϕ, v = dϕ(t, x, θ, flow, args...)
abc = ab(t, x, θ, V, c, θdϕ, v, flow)
t′, action = next_event1(rng, (t, x, θ, V), abc, flow)
else
abc = ab(x, θ, c, θdϕ, v, Flow)
t′, renew = next_time(t, abc, rand(rng))
abc = ab(t, x, θ, V, c, θdϕ, v, flow)
t′, action = next_event1(rng, (t, x, θ, V), abc, flow)
end
end
end
end
"""
pdmp(dϕ, ∇ϕ!, t0, x0, θ0, T, c::Bound, Flow::BouncyParticle, args...; oscn=false, adapt=false, subsample=false, progress=false, progress_stops = 20, islocal = false, seed=Seed(), factor=2.0)
pdmp(dϕ, ∇ϕ!, t0, x0, θ0, T, c::Bound, flow::BouncyParticle, args...; oscn=false, adapt=false, progress=false, progress_stops = 20, islocal = false, seed=Seed(), factor=2.0)

The first directional derivative `dϕ[1]` tells me if I move up or down the potential. The second directional derivative `dϕ[2]` tells me how fast the first changes.
The gradient `∇ϕ!` tells me the surface I want to reflect on. Refreshes proportional to speed.
Keeps only samples at intervals proportional to those times.

The first directional derivative `dϕ[1]` tells me if I move up or down the potential. The second directional derivative `dϕ[2]` tells me how fast the first changes. The gradient `∇ϕ!` tells me the surface I want to reflect on.

dϕ = function (t, x, v, args...) # two directional derivatives
u = ForwardDiff.derivative(t -> -ℓ(x + t*v), Dual{:hSrkahPmmC}(0.0, 1.0))
Expand All @@ -229,54 +295,70 @@ The remaining arguments:
c = 50.0 # initial guess for the bound

# define BouncyParticle sampler (has two relevant parameters)
Z = BouncyParticle(∅, ∅, # ignored
Z = BouncyParticle(∅, ∅, # information about graphical structure
10.0, # momentum refreshment rate
0.95, # momentum correlation / only gradually change momentum in refreshment/momentum update
0.0, # ignored
nothing, # PDMats compatible inverse mass OR
I # left cholesky factor of momentum precision
)

trace, final, (acc, num), cs = @time pdmp(
dneglogp, # return first two directional derivatives of negative target log-likelihood in direction v
∇neglogp!, # return gradient of negative target log-likelihood
t0, x0, θ0, T, # initial state and duration
t0, x0, θ0, #initial state
T, # duration (Real) or number of samples (Int)
ZZB.LocalBound(c), # use Hessian information
Z; # sampler
oscn=false, # no orthogonal subspace pCR
adapt=true, # adapt bound c
adapt_mass=false # adapt PDiag U matrix to fisher information estimate
progress=true, # show progress bar
subsample=true # keep only samples at refreshment times
)

# to obtain direction change times and points of piecewise linear trace
t, x = ZigZagBoomerang.sep(trace)

"""
function pdmp(dϕ, ∇ϕ!, t0, x0, θ0, T, c::Bound, Flow::BouncyParticle, args...; oscn=false, adapt=false, subsample=false, progress=false, progress_stops = 20, islocal = false, seed=Seed(), factor=2.0)
function pdmp(dϕ, ∇ϕ!, t0, x0, θ0, T, c::Bound, flow::BouncyParticle, args...; iter_offset=0, adapt_mass=false, oscn=false, adapt=false, subsample=true, progress=false, progress_stops = 20, islocal = false, seed=Seed(), factor=2.0)
t, x, θ, ∇ϕx = t0, copy(x0), copy(θ0), copy(θ0)
subsample==true || throw(ArgumentError("`subsample=true` required."))
V = record_rate(θ, flow)
rng = Rng(seed)
Ξ = Trace(t0, x0, θ0, Flow)
τref = waiting_time_ref(rng, Flow)
θdϕ, v = dϕ(t, x, θ, args...)
#@assert v2 ≈ v
#@assert θdϕ ≈ dot(∇ϕx, θ)

#∇ϕx = grad_correct!(∇ϕx, x, Flow)
Ξ = Trace(t0, x0, θ0, flow)
θdϕ, v = dϕ(t, x, θ, flow, args...)
∇ϕ!(∇ϕx, t, x, θ, flow, args...)

num = acc = 0
#l = 0.0
abc = ab(x, θ, c, θdϕ, v, Flow)
abc = ab(t, x, θ, V, c, θdϕ, v, flow)
if progress
prg = Progress(progress_stops, 1)
else
prg = missing
end
stops = ismissing(prg) ? 0 : max(prg.n - 1, 0) # allow one stop for cleanup
tstop = T/stops

t′, renew = next_time(t, abc, rand(rng))
while t < T
t, (acc, num), c, abc, (t′, renew), τref = pdmp_inner!(rng, dϕ, ∇ϕ!, ∇ϕx, t, x, θ, c, abc, (t′, renew), τref, (acc, num), Flow, args...; oscn=oscn, subsample=subsample, factor=factor, adapt=adapt)
push!(Ξ, event(t, x, θ, Flow))
Δrec = 1/flow.λref
t′, action = next_event1(rng, (t, x, θ, V), abc, flow)
if adapt_mass
m = mass_adapt_init(flow.U)
end
iter = iter_offset
while T isa Int ? iter < T : t < T
t, V, (acc, num), c, abc, (t′, action), Δrec = pdmp_inner!(rng, dϕ, ∇ϕ!, ∇ϕx, t, x, θ, V, c, abc, (t′, action), Δrec, (acc, num), flow, args...; oscn=oscn, subsample=subsample, factor=factor, adapt=adapt)
push!(Ξ, event(t, x, θ, flow))
iter += 1
if adapt_mass # todo: make function
if m isa Cholesky
m.factors .*= sqrt(1-1/iter)
LinearAlgebra.lowrankupdate!(HC, ∇ϕx/sqrt(iter))
else
@. m = m + (∇ϕx^2 - m)/iter # running average
end
action = :invalid
PDMats.whiten!(flow.U, θ)
mass_adapt!(flow.U, m)
PDMats.unwhiten!(flow.U, θ)
end

if t > tstop
tstop += T/stops
Expand All @@ -287,6 +369,7 @@ function pdmp(dϕ, ∇ϕ!, t0, x0, θ0, T, c::Bound, Flow::BouncyParticle, args.
return Ξ, (t, x, θ), (acc, num), c
end

##########
wrap(f) = wrap_(f, methods(f)...)
wrap_(f, args...) = f
@inline wrap_(f, m) = m.nargs <= 4 ? Wrapper(f) : f
Expand All @@ -295,5 +378,5 @@ struct Wrapper{F}
end
(F::Wrapper)(y, t, x, θ, args...) = F.f(y, x, args...), nothing

pdmp(∇ϕ!, t0, x0, θ0, T, c, Flow::Union{BouncyParticle, Boomerang}, args...; nargs...) =
pdmp(Wrapper(∇ϕ!), t0, x0, θ0, T, GlobalBound(c), Flow, args...; nargs...)
pdmp(∇ϕ!, t0, x0, θ0, T, c, flow::Union{BouncyParticle, Boomerang}, args...; nargs...) =
pdmp(Wrapper(∇ϕ!), t0, x0, θ0, T, GlobalBound(c), flow, args...; nargs...)
Loading