Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rqatrend allocationfree #45

Merged
merged 8 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
/docs/Manifest.toml
/docs/build/
Manifest.toml
tmp/
20 changes: 12 additions & 8 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ GDAL = "add2ef01-049f-52c4-9ee2-e494f65e021a"
GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f"
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
KML = "1284bf3a-1e3d-4f4e-a7a9-b9d235a28f35"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
Proj = "c94c279d-25a6-4763-9509-64d165bea63e"
Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689"
RecurrenceAnalysis = "639c3291-70d9-5ea2-8c5b-839eba1ee399"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70"
Expand All @@ -37,31 +39,29 @@ YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c"
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"

[compat]
julia = "1.10"
YAXArrayBase = "0.6, 0.7"
Glob = "1"
NetCDF="0.12"
Zarr = "0.9"
AllocCheck = "0.2.0"
ArchGDAL = "0.10"
ArgParse = "1"
BenchmarkTools = "1.6.0"
CairoMakie = "0.12"
ConstructionBase = "1"
DimensionalData = "0.29"
DiskArrays = "0.4"
DiskArrayTools = "0.1"
DiskArrays = "0.4"
Distances = "0.10"
Distributions = "0.25"
Extents = "0.1"
FillArrays = "1"
GDAL = "1"
GeoFormatTypes = "0.4"
Glob = "1"
julia = "1.10"
KML = "0.2"
LinearAlgebra = "1.10.0, 1.11.0"
LoggingExtras = "1"
Missings = "1"
NetCDF = "0.12"
Proj = "1"
Random = "1.10.0, 1.11.0"
Rasters = "0.12,0.13"
RecurrenceAnalysis = "2"
Statistics = "1"
Expand All @@ -70,9 +70,13 @@ UnicodePlots = "3"
YAXArrayBase = "0.6, 0.7"
YAXArrays = "0.5"
Zarr = "0.9"
julia = "1.10"

[extras]
AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[targets]
test = ["Test"]
test = ["Test", "AllocCheck", "BenchmarkTools", "Random"]
5 changes: 3 additions & 2 deletions src/RQADeforestation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ using Zarr
using Distributed: myid
using NetCDF

export gdalcube
export gdalcube, rqatrend

include("auxil.jl")
include("analysis.jl")
include("rqatrend.jl")
include("analysis.jl") # TODO what is still needed from analysis now that rqatrend is in its own file?
include("timestats.jl")


Expand Down
22 changes: 3 additions & 19 deletions src/analysis.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using RecurrenceAnalysis
#using MultipleTesting
using Distances

const RA = RecurrenceAnalysis
"""
countvalid(xout, xin)
Expand Down Expand Up @@ -31,12 +30,12 @@ countvalid(cube; path=tempname() * ".zarr") = mapCube(countvalid, cube; indims=I


"""
rqatrend(xin, xout, thresh)
rqatrend(xout, xin, thresh)

Compute the RQA trend metric for the non-missing time steps of xin, and save it to xout.
`thresh` specifies the epsilon threshold of the Recurrence Plot computation
"""
function rqatrend(pix_trend, pix, thresh=2)
function rqatrend_recurrenceanalysis(pix_trend, pix, thresh=2)
#replace!(pix, -9999 => missing)
ts = collect(skipmissing(pix))
#@show length(ts)
Expand All @@ -51,28 +50,13 @@ function rqatrend_matrix(pix_trend, pix, thresh=2)
pix_trend .= RA.trend(rm)
end

"""rqatrend(cube;thresh=2, path=tempname() * ".zarr")

Compute the RQA trend metric for the datacube `cube` with the epsilon threshold `thresh`.
"""
function rqatrend(cube; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...)
@show outpath
mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=Float32, path=outpath, overwrite, kwargs...))
end

"""rqatrend(path::AbstractString; thresh=2, outpath=tempname()*".zarr")

Compute the RQA trend metric for the data that is available on `path`.
"""
rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...)


"""
rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=300)
Compute the RQA trend metric for shuffled time series of the data cube `cube` with the epsilon threshold `thresh` for `numshuffle` tries and save it into `path`.
"""
function rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=300)
# This should be made a random shuffle
# TODO this looks completely broken
sg = surrogenerator(collect(eachindex(water[overlap])), BlockShuffle(7, shift=true))
end

Expand Down
91 changes: 91 additions & 0 deletions src/rqatrend.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
using LinearAlgebra
using StaticArrays
using Distances


"""rqatrend(cube;thresh=2, path=tempname() * ".zarr")

Compute the RQA trend metric for the datacube `cube` with the epsilon threshold `thresh`.
"""
function rqatrend(cube; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...)
@show outpath
mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=Float32, path=outpath, overwrite, kwargs...))
end

"""rqatrend(path::AbstractString; thresh=2, outpath=tempname()*".zarr")

Compute the RQA trend metric for the data that is available on `path`.
"""
rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...)


"""
rqatrend(xout, xin, thresh)

Compute the RQA trend metric for the non-missing time steps of xin, and save it to xout.
`thresh` specifies the epsilon threshold of the Recurrence Plot computation
"""
function rqatrend(pix_trend, pix, thresh=2)
ts = collect(skipmissing(pix))
pix_trend .= rqatrend_impl(ts; thresh)
end


function rqatrend_impl(data; thresh=2, border=10, theiler=1, metric=Euclidean())
# simplified implementation of https://stats.stackexchange.com/a/370175 and https://github.com/joshday/OnlineStats.jl/blob/b89a99679b13e3047ff9c93a03c303c357931832/src/stats/linreg.jl
# x is the diagonal offset, y the percentage of local recurrence
# we compute the slope of a simple linear regression with bias from x to y
xs = 1+theiler : length(data)-border
x_mean = mean(xs)
xx_mean = sqmean_step1_range(xs) # mean(x*x for x in xs)

n = 0.0
y_mean = 0.0
xy_mean = 0.0
for x in xs
n += 1.0
y = tau_rr(data, x; thresh, metric)
y_mean = smooth(y_mean, y, inv(n))
xy_mean = smooth(xy_mean, x*y, inv(n))
end
A = SA[
xx_mean x_mean
x_mean 1.0
]
b = SA[xy_mean, y_mean]
# OnlineStats uses `Symmetric(A) \ b`, however this does not work for StaticArrays
# `cholesky(A) \ b` is recommended instead at discourse https://discourse.julialang.org/t/staticarrays-solve-symmetric-linear-system-seems-typeinstable/124634
# some timings show that there is no significant speedup when adding cholesky or doing plain static linear regression
# hence leaving it out for now
return 1000.0*(A \ b)[1] # slope
end

function tau_rr(y, d; thresh=2, metric=Euclidean())
# d starts counting at 1, so this is the middle diagonal (similar to tau_recurrence implementation, where the first index is always 1.0, i.e. represents the middle diagonal)
# for the computation starting at 0 is more intuitive
d -= 1
if d == 0
return 1.0
else
# `sum/n` is almost twice as fast as using `mean`
return @inbounds sum(evaluate(metric, y[i], y[i+d]) <= thresh for i in 1:length(y)-d) / (length(y)-d)
end
end

function sqmean_step1_range(xs)
a = first(xs)
b = last(xs)
return (sumofsquares(b) - sumofsquares(a - 1.0)) / length(xs)
end

sumofsquares(n) = n*(n+1.0)*(2.0*n+1.0)/6.0
sumofsquares(4)

"""
smooth(a, b, γ)

Weighted average of `a` and `b` with weight `γ`.

``(1 - γ) * a + γ * b``
"""
smooth(a, b, γ) = a + γ * (b - a)
9 changes: 9 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
using RQADeforestation
using Test
import AllocCheck
import Random
Random.seed!(1234)

@testset "RQADeforestation.jl" begin
# Write your tests here.

x = 1:0.01:30
y = sin.(x) + 0.1x + rand(length(x))

@test isapprox(RQADeforestation.rqatrend_impl(y; thresh=0.5), -0.11125611687816017)
@test isempty(AllocCheck.check_allocs(RQADeforestation.rqatrend_impl, Tuple{Vector{Float64}}))
end
Loading