From 322b23f00aab21d2b017e38d885b8df52fcebee6 Mon Sep 17 00:00:00 2001 From: Stephan Sahm Date: Wed, 15 Jan 2025 11:16:39 +0100 Subject: [PATCH] Rqatrend allocationfree (#45) * adding new allocationfree rqatrend function * ignored tmp folder * rebase to main got messed up a little * small fixes * fix test and warnings * extra documentation link * test sysimage fixes linearalgebra to 1.10.7 * github action runs with julia 1.10 --- .gitignore | 1 + Project.toml | 20 +++++---- src/RQADeforestation.jl | 5 ++- src/analysis.jl | 22 ++-------- src/rqatrend.jl | 91 +++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 9 ++++ 6 files changed, 119 insertions(+), 29 deletions(-) create mode 100644 src/rqatrend.jl diff --git a/.gitignore b/.gitignore index 31d573e..734b1cf 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ /docs/Manifest.toml /docs/build/ Manifest.toml +tmp/ diff --git a/Project.toml b/Project.toml index db5957f..c5c5f7b 100644 --- a/Project.toml +++ b/Project.toml @@ -22,12 +22,14 @@ GDAL = "add2ef01-049f-52c4-9ee2-e494f65e021a" GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f" Glob = "c27321d9-0574-5035-807b-f59d2c89b15c" KML = "1284bf3a-1e3d-4f4e-a7a9-b9d235a28f35" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" Proj = "c94c279d-25a6-4763-9509-64d165bea63e" Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" RecurrenceAnalysis = "639c3291-70d9-5ea2-8c5b-839eba1ee399" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70" @@ -37,18 +39,15 @@ YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99" [compat] -julia = "1.10" -YAXArrayBase = "0.6, 0.7" -Glob = "1" -NetCDF="0.12" -Zarr = "0.9" +AllocCheck = "0.2.0" ArchGDAL = "0.10" ArgParse = "1" +BenchmarkTools = "1.6.0" CairoMakie = "0.12" ConstructionBase = "1" DimensionalData = "0.29" -DiskArrays = "0.4" DiskArrayTools = "0.1" +DiskArrays = "0.4" Distances = "0.10" Distributions = "0.25" Extents = "0.1" @@ -56,12 +55,13 @@ FillArrays = "1" GDAL = "1" GeoFormatTypes = "0.4" Glob = "1" -julia = "1.10" KML = "0.2" +LinearAlgebra = "1.10.0, 1.11.0" LoggingExtras = "1" Missings = "1" NetCDF = "0.12" Proj = "1" +Random = "1.10.0, 1.11.0" Rasters = "0.12,0.13" RecurrenceAnalysis = "2" Statistics = "1" @@ -70,9 +70,13 @@ UnicodePlots = "3" YAXArrayBase = "0.6, 0.7" YAXArrays = "0.5" Zarr = "0.9" +julia = "1.10" [extras] +AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [targets] -test = ["Test"] +test = ["Test", "AllocCheck", "BenchmarkTools", "Random"] diff --git a/src/RQADeforestation.jl b/src/RQADeforestation.jl index cbb23e4..806ab43 100644 --- a/src/RQADeforestation.jl +++ b/src/RQADeforestation.jl @@ -8,10 +8,11 @@ using Zarr using Distributed: myid using NetCDF -export gdalcube +export gdalcube, rqatrend include("auxil.jl") -include("analysis.jl") +include("rqatrend.jl") +include("analysis.jl") # TODO what is still needed from analysis now that rqatrend is in its own file? include("timestats.jl") diff --git a/src/analysis.jl b/src/analysis.jl index 2af9bea..e4ab7a0 100644 --- a/src/analysis.jl +++ b/src/analysis.jl @@ -1,7 +1,6 @@ using RecurrenceAnalysis #using MultipleTesting using Distances - const RA = RecurrenceAnalysis """ countvalid(xout, xin) @@ -31,12 +30,12 @@ countvalid(cube; path=tempname() * ".zarr") = mapCube(countvalid, cube; indims=I """ -rqatrend(xin, xout, thresh) +rqatrend(xout, xin, thresh) Compute the RQA trend metric for the non-missing time steps of xin, and save it to xout. `thresh` specifies the epsilon threshold of the Recurrence Plot computation """ -function rqatrend(pix_trend, pix, thresh=2) +function rqatrend_recurrenceanalysis(pix_trend, pix, thresh=2) #replace!(pix, -9999 => missing) ts = collect(skipmissing(pix)) #@show length(ts) @@ -51,28 +50,13 @@ function rqatrend_matrix(pix_trend, pix, thresh=2) pix_trend .= RA.trend(rm) end -"""rqatrend(cube;thresh=2, path=tempname() * ".zarr") - -Compute the RQA trend metric for the datacube `cube` with the epsilon threshold `thresh`. -""" -function rqatrend(cube; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) - @show outpath - mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=Float32, path=outpath, overwrite, kwargs...)) -end - -"""rqatrend(path::AbstractString; thresh=2, outpath=tempname()*".zarr") - -Compute the RQA trend metric for the data that is available on `path`. -""" -rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...) - - """ rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=300) Compute the RQA trend metric for shuffled time series of the data cube `cube` with the epsilon threshold `thresh` for `numshuffle` tries and save it into `path`. """ function rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=300) # This should be made a random shuffle + # TODO this looks completely broken sg = surrogenerator(collect(eachindex(water[overlap])), BlockShuffle(7, shift=true)) end diff --git a/src/rqatrend.jl b/src/rqatrend.jl new file mode 100644 index 0000000..795325d --- /dev/null +++ b/src/rqatrend.jl @@ -0,0 +1,91 @@ +using LinearAlgebra +using StaticArrays +using Distances + + +"""rqatrend(cube;thresh=2, path=tempname() * ".zarr") + +Compute the RQA trend metric for the datacube `cube` with the epsilon threshold `thresh`. +""" +function rqatrend(cube; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) + @show outpath + mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=Float32, path=outpath, overwrite, kwargs...)) +end + +"""rqatrend(path::AbstractString; thresh=2, outpath=tempname()*".zarr") + +Compute the RQA trend metric for the data that is available on `path`. +""" +rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...) + + +""" +rqatrend(xout, xin, thresh) + +Compute the RQA trend metric for the non-missing time steps of xin, and save it to xout. +`thresh` specifies the epsilon threshold of the Recurrence Plot computation +""" +function rqatrend(pix_trend, pix, thresh=2) + ts = collect(skipmissing(pix)) + pix_trend .= rqatrend_impl(ts; thresh) +end + + +function rqatrend_impl(data; thresh=2, border=10, theiler=1, metric=Euclidean()) + # simplified implementation of https://stats.stackexchange.com/a/370175 and https://github.com/joshday/OnlineStats.jl/blob/b89a99679b13e3047ff9c93a03c303c357931832/src/stats/linreg.jl + # x is the diagonal offset, y the percentage of local recurrence + # we compute the slope of a simple linear regression with bias from x to y + xs = 1+theiler : length(data)-border + x_mean = mean(xs) + xx_mean = sqmean_step1_range(xs) # mean(x*x for x in xs) + + n = 0.0 + y_mean = 0.0 + xy_mean = 0.0 + for x in xs + n += 1.0 + y = tau_rr(data, x; thresh, metric) + y_mean = smooth(y_mean, y, inv(n)) + xy_mean = smooth(xy_mean, x*y, inv(n)) + end + A = SA[ + xx_mean x_mean + x_mean 1.0 + ] + b = SA[xy_mean, y_mean] + # OnlineStats uses `Symmetric(A) \ b`, however this does not work for StaticArrays + # `cholesky(A) \ b` is recommended instead at discourse https://discourse.julialang.org/t/staticarrays-solve-symmetric-linear-system-seems-typeinstable/124634 + # some timings show that there is no significant speedup when adding cholesky or doing plain static linear regression + # hence leaving it out for now + return 1000.0*(A \ b)[1] # slope +end + +function tau_rr(y, d; thresh=2, metric=Euclidean()) + # d starts counting at 1, so this is the middle diagonal (similar to tau_recurrence implementation, where the first index is always 1.0, i.e. represents the middle diagonal) + # for the computation starting at 0 is more intuitive + d -= 1 + if d == 0 + return 1.0 + else + # `sum/n` is almost twice as fast as using `mean` + return @inbounds sum(evaluate(metric, y[i], y[i+d]) <= thresh for i in 1:length(y)-d) / (length(y)-d) + end +end + +function sqmean_step1_range(xs) + a = first(xs) + b = last(xs) + return (sumofsquares(b) - sumofsquares(a - 1.0)) / length(xs) +end + +sumofsquares(n) = n*(n+1.0)*(2.0*n+1.0)/6.0 +sumofsquares(4) + +""" + smooth(a, b, γ) + +Weighted average of `a` and `b` with weight `γ`. + +``(1 - γ) * a + γ * b`` +""" +smooth(a, b, γ) = a + γ * (b - a) diff --git a/test/runtests.jl b/test/runtests.jl index bcad455..f99e5dd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,15 @@ using RQADeforestation using Test +import AllocCheck +import Random +Random.seed!(1234) @testset "RQADeforestation.jl" begin # Write your tests here. + + x = 1:0.01:30 + y = sin.(x) + 0.1x + rand(length(x)) + + @test isapprox(RQADeforestation.rqatrend_impl(y; thresh=0.5), -0.11125611687816017) + @test isempty(AllocCheck.check_allocs(RQADeforestation.rqatrend_impl, Tuple{Vector{Float64}})) end