From 8d9979fab02fbf629891d427c95828c33dbb6ee4 Mon Sep 17 00:00:00 2001 From: Stephan Sahm Date: Tue, 14 Jan 2025 16:10:10 +0100 Subject: [PATCH 1/8] adding new allocationfree rqatrend function --- Project.toml | 11 +++-- src/RQADeforestation.jl | 5 +- src/analysis.jl | 22 ++------- src/rqatrend.jl | 100 ++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 9 ++++ 5 files changed, 123 insertions(+), 24 deletions(-) create mode 100644 src/rqatrend.jl diff --git a/Project.toml b/Project.toml index db5957f..68af44a 100644 --- a/Project.toml +++ b/Project.toml @@ -28,6 +28,7 @@ NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" Proj = "c94c279d-25a6-4763-9509-64d165bea63e" Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" RecurrenceAnalysis = "639c3291-70d9-5ea2-8c5b-839eba1ee399" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70" @@ -42,13 +43,15 @@ YAXArrayBase = "0.6, 0.7" Glob = "1" NetCDF="0.12" Zarr = "0.9" +AllocCheck = "0.2.0" ArchGDAL = "0.10" ArgParse = "1" +BenchmarkTools = "1.6.0" CairoMakie = "0.12" ConstructionBase = "1" DimensionalData = "0.29" -DiskArrays = "0.4" DiskArrayTools = "0.1" +DiskArrays = "0.4" Distances = "0.10" Distributions = "0.25" Extents = "0.1" @@ -56,7 +59,6 @@ FillArrays = "1" GDAL = "1" GeoFormatTypes = "0.4" Glob = "1" -julia = "1.10" KML = "0.2" LoggingExtras = "1" Missings = "1" @@ -70,9 +72,12 @@ UnicodePlots = "3" YAXArrayBase = "0.6, 0.7" YAXArrays = "0.5" Zarr = "0.9" +julia = "1.10" [extras] +AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Test", "AllocCheck", "BenchmarkTools"] diff --git a/src/RQADeforestation.jl b/src/RQADeforestation.jl index cbb23e4..806ab43 100644 --- a/src/RQADeforestation.jl +++ b/src/RQADeforestation.jl @@ -8,10 +8,11 @@ using Zarr using Distributed: myid using NetCDF -export gdalcube +export gdalcube, rqatrend include("auxil.jl") -include("analysis.jl") +include("rqatrend.jl") +include("analysis.jl") # TODO what is still needed from analysis now that rqatrend is in its own file? include("timestats.jl") diff --git a/src/analysis.jl b/src/analysis.jl index 2af9bea..e4ab7a0 100644 --- a/src/analysis.jl +++ b/src/analysis.jl @@ -1,7 +1,6 @@ using RecurrenceAnalysis #using MultipleTesting using Distances - const RA = RecurrenceAnalysis """ countvalid(xout, xin) @@ -31,12 +30,12 @@ countvalid(cube; path=tempname() * ".zarr") = mapCube(countvalid, cube; indims=I """ -rqatrend(xin, xout, thresh) +rqatrend(xout, xin, thresh) Compute the RQA trend metric for the non-missing time steps of xin, and save it to xout. `thresh` specifies the epsilon threshold of the Recurrence Plot computation """ -function rqatrend(pix_trend, pix, thresh=2) +function rqatrend_recurrenceanalysis(pix_trend, pix, thresh=2) #replace!(pix, -9999 => missing) ts = collect(skipmissing(pix)) #@show length(ts) @@ -51,28 +50,13 @@ function rqatrend_matrix(pix_trend, pix, thresh=2) pix_trend .= RA.trend(rm) end -"""rqatrend(cube;thresh=2, path=tempname() * ".zarr") - -Compute the RQA trend metric for the datacube `cube` with the epsilon threshold `thresh`. -""" -function rqatrend(cube; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) - @show outpath - mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=Float32, path=outpath, overwrite, kwargs...)) -end - -"""rqatrend(path::AbstractString; thresh=2, outpath=tempname()*".zarr") - -Compute the RQA trend metric for the data that is available on `path`. -""" -rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...) - - """ rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=300) Compute the RQA trend metric for shuffled time series of the data cube `cube` with the epsilon threshold `thresh` for `numshuffle` tries and save it into `path`. """ function rqatrend_shuffle(cube; thresh=2, path=tempname() * ".zarr", numshuffle=300) # This should be made a random shuffle + # TODO this looks completely broken sg = surrogenerator(collect(eachindex(water[overlap])), BlockShuffle(7, shift=true)) end diff --git a/src/rqatrend.jl b/src/rqatrend.jl new file mode 100644 index 0000000..978467b --- /dev/null +++ b/src/rqatrend.jl @@ -0,0 +1,100 @@ +using LinearAlgebra +using StaticArrays +using Distances + + +"""rqatrend(cube;thresh=2, path=tempname() * ".zarr") + +Compute the RQA trend metric for the datacube `cube` with the epsilon threshold `thresh`. +""" +function rqatrend(cube; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) + @show outpath + mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=Float32, path=outpath, overwrite, kwargs...)) +end + +"""rqatrend(path::AbstractString; thresh=2, outpath=tempname()*".zarr") + +Compute the RQA trend metric for the data that is available on `path`. +""" +rqatrend(path::AbstractString; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) = rqatrend(Cube(path); thresh, outpath, overwrite, kwargs...) + + +""" +rqatrend(xout, xin, thresh) + +Compute the RQA trend metric for the non-missing time steps of xin, and save it to xout. +`thresh` specifies the epsilon threshold of the Recurrence Plot computation +""" +function rqatrend(pix_trend, pix, thresh=2) + #replace!(pix, -9999 => missing) + ts = collect(skipmissing(pix)) + #@show length(ts) + tau_pix = tau_rr(ts, thresh) + pix_trend .= RA._trend(tau_pix) +end + + +function rqatrend(pix_trend, pix, thresh=2) + ts = collect(skipmissing(pix)) + pix_trend .= rqatrend(ts; thresh) +end + + +function rqatrend(data; thresh=2, border=10, theiler=1, metric=Euclidean()) + # simplified implementation of https://stats.stackexchange.com/a/370175 + # x is the diagonal offset, y the percentage of local recurrence + # we compute the slope of a simple linear regression with bias from x to y + xs = 1+theiler : length(data)-border + x_mean = mean(xs) + xx_mean = sqmean_step1_range(xs) # mean(x*x for x in xs) + + n = 0.0 + y_mean = 0.0 + xy_mean = 0.0 + for x in xs + n += 1.0 + y = tau_rr(data, x; thresh, metric) + y_mean = smooth(y_mean, y, inv(n)) + xy_mean = smooth(xy_mean, x*y, inv(n)) + end + A = SA[ + xx_mean x_mean + x_mean 1.0 + ] + b = SA[xy_mean, y_mean] + # OnlineStats uses `Symmetric(A) \ b`, however this does not work for StaticArrays + # `cholesky(A) \ b` is recommended instead at discourse https://discourse.julialang.org/t/staticarrays-solve-symmetric-linear-system-seems-typeinstable/124634 + # some timings show that there is no significant speedup when adding cholesky or doing plain static linear regression + # hence leaving it out for now + return 1000.0*(A \ b)[1] # slope +end + +function tau_rr(y, d; thresh=2, metric=Euclidean()) + # d starts counting at 1, so this is the middle diagonal (similar to tau_recurrence implementation, where the first index is always 1.0, i.e. represents the middle diagonal) + # for the computation starting at 0 is more intuitive + d -= 1 + if d == 0 + return 1.0 + else + # `sum/n` is almost twice as fast as using `mean` + return @inbounds sum(evaluate(metric, y[i], y[i+d]) <= thresh for i in 1:length(y)-d) / (length(y)-d) + end +end + +function sqmean_step1_range(xs) + a = first(xs) + b = last(xs) + return (sumofsquares(b) - sumofsquares(a - 1.0)) / length(xs) +end + +sumofsquares(n) = n*(n+1.0)*(2.0*n+1.0)/6.0 +sumofsquares(4) + +""" + smooth(a, b, γ) + +Weighted average of `a` and `b` with weight `γ`. + +``(1 - γ) * a + γ * b`` +""" +smooth(a, b, γ) = a + γ * (b - a) diff --git a/test/runtests.jl b/test/runtests.jl index bcad455..29bf5ff 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,15 @@ using RQADeforestation using Test +import AllocCheck +import Random +Random.seed!(1234) @testset "RQADeforestation.jl" begin # Write your tests here. + + x = 1:0.01:30 + y = sin.(x) + 0.1x + rand(length(x)) + + @test isapprox(rqatrend(y; thresh=0.5), 0.1) + @test isempty(AllocCheck.check_allocs(rqatrend, Tuple{Vector{Float64}})) end From 25a6241d11c40b4280ed2f892224f89b773d4391 Mon Sep 17 00:00:00 2001 From: Stephan Sahm Date: Tue, 14 Jan 2025 16:11:30 +0100 Subject: [PATCH 2/8] ignored tmp folder --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 31d573e..734b1cf 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ /docs/Manifest.toml /docs/build/ Manifest.toml +tmp/ From ec28a41962b22f8d45b3cd571c8b40297f908871 Mon Sep 17 00:00:00 2001 From: Stephan Sahm Date: Tue, 14 Jan 2025 16:15:10 +0100 Subject: [PATCH 3/8] rebase to main got messed up a little --- Project.toml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Project.toml b/Project.toml index 68af44a..bbbf264 100644 --- a/Project.toml +++ b/Project.toml @@ -39,10 +39,6 @@ Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99" [compat] julia = "1.10" -YAXArrayBase = "0.6, 0.7" -Glob = "1" -NetCDF="0.12" -Zarr = "0.9" AllocCheck = "0.2.0" ArchGDAL = "0.10" ArgParse = "1" @@ -72,7 +68,6 @@ UnicodePlots = "3" YAXArrayBase = "0.6, 0.7" YAXArrays = "0.5" Zarr = "0.9" -julia = "1.10" [extras] AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a" From e0fe214d4f351550f2e5582eb820117af8b01a7d Mon Sep 17 00:00:00 2001 From: Stephan Sahm Date: Tue, 14 Jan 2025 16:31:20 +0100 Subject: [PATCH 4/8] small fixes --- Project.toml | 4 +++- test/runtests.jl | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index bbbf264..e348130 100644 --- a/Project.toml +++ b/Project.toml @@ -22,6 +22,7 @@ GDAL = "add2ef01-049f-52c4-9ee2-e494f65e021a" GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f" Glob = "c27321d9-0574-5035-807b-f59d2c89b15c" KML = "1284bf3a-1e3d-4f4e-a7a9-b9d235a28f35" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" @@ -38,7 +39,6 @@ YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99" [compat] -julia = "1.10" AllocCheck = "0.2.0" ArchGDAL = "0.10" ArgParse = "1" @@ -56,6 +56,7 @@ GDAL = "1" GeoFormatTypes = "0.4" Glob = "1" KML = "0.2" +LinearAlgebra = "1.11.0" LoggingExtras = "1" Missings = "1" NetCDF = "0.12" @@ -68,6 +69,7 @@ UnicodePlots = "3" YAXArrayBase = "0.6, 0.7" YAXArrays = "0.5" Zarr = "0.9" +julia = "1.10" [extras] AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a" diff --git a/test/runtests.jl b/test/runtests.jl index 29bf5ff..0c14fe6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,6 +10,6 @@ Random.seed!(1234) x = 1:0.01:30 y = sin.(x) + 0.1x + rand(length(x)) - @test isapprox(rqatrend(y; thresh=0.5), 0.1) + @test isapprox(rqatrend(y; thresh=0.5), -0.11125611687816017) @test isempty(AllocCheck.check_allocs(rqatrend, Tuple{Vector{Float64}})) end From 28ec59f9af172ff7fcb81daf423146c5aef36ccd Mon Sep 17 00:00:00 2001 From: Stephan Sahm Date: Tue, 14 Jan 2025 17:46:45 +0100 Subject: [PATCH 5/8] fix test and warnings --- Project.toml | 4 +++- src/rqatrend.jl | 13 ++----------- test/runtests.jl | 4 ++-- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/Project.toml b/Project.toml index e348130..e249205 100644 --- a/Project.toml +++ b/Project.toml @@ -61,6 +61,7 @@ LoggingExtras = "1" Missings = "1" NetCDF = "0.12" Proj = "1" +Random = "1.11.0" Rasters = "0.12,0.13" RecurrenceAnalysis = "2" Statistics = "1" @@ -75,6 +76,7 @@ julia = "1.10" AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [targets] -test = ["Test", "AllocCheck", "BenchmarkTools"] +test = ["Test", "AllocCheck", "BenchmarkTools", "Random"] diff --git a/src/rqatrend.jl b/src/rqatrend.jl index 978467b..d34e2e9 100644 --- a/src/rqatrend.jl +++ b/src/rqatrend.jl @@ -26,21 +26,12 @@ Compute the RQA trend metric for the non-missing time steps of xin, and save it `thresh` specifies the epsilon threshold of the Recurrence Plot computation """ function rqatrend(pix_trend, pix, thresh=2) - #replace!(pix, -9999 => missing) ts = collect(skipmissing(pix)) - #@show length(ts) - tau_pix = tau_rr(ts, thresh) - pix_trend .= RA._trend(tau_pix) + pix_trend .= rqatrend_impl(ts; thresh) end -function rqatrend(pix_trend, pix, thresh=2) - ts = collect(skipmissing(pix)) - pix_trend .= rqatrend(ts; thresh) -end - - -function rqatrend(data; thresh=2, border=10, theiler=1, metric=Euclidean()) +function rqatrend_impl(data; thresh=2, border=10, theiler=1, metric=Euclidean()) # simplified implementation of https://stats.stackexchange.com/a/370175 # x is the diagonal offset, y the percentage of local recurrence # we compute the slope of a simple linear regression with bias from x to y diff --git a/test/runtests.jl b/test/runtests.jl index 0c14fe6..f99e5dd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,6 +10,6 @@ Random.seed!(1234) x = 1:0.01:30 y = sin.(x) + 0.1x + rand(length(x)) - @test isapprox(rqatrend(y; thresh=0.5), -0.11125611687816017) - @test isempty(AllocCheck.check_allocs(rqatrend, Tuple{Vector{Float64}})) + @test isapprox(RQADeforestation.rqatrend_impl(y; thresh=0.5), -0.11125611687816017) + @test isempty(AllocCheck.check_allocs(RQADeforestation.rqatrend_impl, Tuple{Vector{Float64}})) end From a8bd89228fc1e73daf856f1bcd53b0d0ada3ab3a Mon Sep 17 00:00:00 2001 From: Stephan Sahm Date: Tue, 14 Jan 2025 17:51:15 +0100 Subject: [PATCH 6/8] extra documentation link --- src/rqatrend.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rqatrend.jl b/src/rqatrend.jl index d34e2e9..795325d 100644 --- a/src/rqatrend.jl +++ b/src/rqatrend.jl @@ -32,7 +32,7 @@ end function rqatrend_impl(data; thresh=2, border=10, theiler=1, metric=Euclidean()) - # simplified implementation of https://stats.stackexchange.com/a/370175 + # simplified implementation of https://stats.stackexchange.com/a/370175 and https://github.com/joshday/OnlineStats.jl/blob/b89a99679b13e3047ff9c93a03c303c357931832/src/stats/linreg.jl # x is the diagonal offset, y the percentage of local recurrence # we compute the slope of a simple linear regression with bias from x to y xs = 1+theiler : length(data)-border From bc628f209ab691325921279c29282a4e73f40952 Mon Sep 17 00:00:00 2001 From: Stephan Sahm Date: Tue, 14 Jan 2025 17:56:50 +0100 Subject: [PATCH 7/8] test sysimage fixes linearalgebra to 1.10.7 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e249205..87b9d0d 100644 --- a/Project.toml +++ b/Project.toml @@ -56,7 +56,7 @@ GDAL = "1" GeoFormatTypes = "0.4" Glob = "1" KML = "0.2" -LinearAlgebra = "1.11.0" +LinearAlgebra = "1.10.0, 1.11.0" LoggingExtras = "1" Missings = "1" NetCDF = "0.12" From b2480e2489068ad0f5af9aaf5f8da2156ed55cc1 Mon Sep 17 00:00:00 2001 From: Stephan Sahm Date: Wed, 15 Jan 2025 10:05:19 +0100 Subject: [PATCH 8/8] github action runs with julia 1.10 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 87b9d0d..c5c5f7b 100644 --- a/Project.toml +++ b/Project.toml @@ -61,7 +61,7 @@ LoggingExtras = "1" Missings = "1" NetCDF = "0.12" Proj = "1" -Random = "1.11.0" +Random = "1.10.0, 1.11.0" Rasters = "0.12,0.13" RecurrenceAnalysis = "2" Statistics = "1"