JuliaDynamics · Datseris · Sep 26, 2022 · Sep 19, 2022 · Sep 19, 2022 · Sep 19, 2022
diff --git a/docs/src/complexity_measures.md b/docs/src/complexity_measures.md
@@ -1,7 +1,13 @@
-# Complexity measures
+# [Complexity measures](@id complexity_measures)
 
 ## Sample entropy
 
 ## Approximate entropy
 
+## Reverse dispersion entropy
+
+```@docs
+reverse_dispersion
+```
+
 ## Disequilibrium
diff --git a/docs/src/entropies.md b/docs/src/entropies.md
@@ -33,4 +33,4 @@ entropy_permutation
 entropy_spatial_permutation
 entropy_wavelet
 entropy_dispersion
-```
+```
diff --git a/docs/src/examples.md b/docs/src/examples.md
@@ -145,3 +145,62 @@ for a in (ax, ay, az); axislegend(a); end
 for a in (ax, ay); hidexdecorations!(a; grid=false); end
 fig
 ```
+
+## [Dispersion and reverse dispersion entropy](@id dispersion_examples)
+
+Here we reproduce parts of figure 3 in Li et al. (2019), computing reverse and regular dispersion entropy for a time series consisting of normally distributed noise with a single spike in the middle of the signal. We compute the entropies over a range subsets of the data, using a sliding window consisting of 70 data points, stepping the window 10 time steps at a time.
+
+Note: the results here are not exactly the same as in the original paper, because Li et 
+al. (2019) base their examples on randomly generated numbers and do not provide code that 
+specify random number seeds.
+
+```@example
+using Entropies, DynamicalSystems, Random, CairoMakie, Distributions
+
+n = 1000
+ts = 1:n
+x = [i == n ÷ 2 ? 50.0 : 0.0 for i in ts]
+rng = Random.default_rng()
+s = rand(rng, Normal(0, 1), n)
+y = x .+ s
+
+ws = 70
+windows = [t:t+ws for t in 1:10:n-ws]
+rdes = zeros(length(windows))
+des = zeros(length(windows))
+pes = zeros(length(windows))
+
+m, c = 2, 6
+scheme = GaussianSymbolization(c)
+est_de = Dispersion(s = scheme, m = m, τ = 1, normalize = true)
+
+for (i, window) in enumerate(windows)
+    rdes[i] = reverse_dispersion(y[window];
+        s = scheme, m = m, τ = 1, normalize = true)
+    des[i] = entropy_renyi(y[window], est_de)
+end
+
+fig = Figure()
+
+a1 = Axis(fig[1,1]; xlabel = "Time step", ylabel = "Value")
+lines!(a1, ts, y)
+display(fig)
+
+a2 = Axis(fig[2, 1]; xlabel = "Time step", ylabel = "Value")
+p_rde = scatterlines!([first(w) for w in windows], rdes,
+    label = "Reverse dispersion entropy",
+    color = :black,
+    markercolor = :black, marker = '●')
+p_de = scatterlines!([first(w) for w in windows], des,
+    label = "Dispersion entropy",
+    color = :red,
+    markercolor = :red, marker = 'x', markersize = 20)
+
+axislegend(position = :rc)
+ylims!(0, max(maximum(pes), 1))
+fig
+```
+
+[^Rostaghi2016]: Rostaghi, M., & Azami, H. (2016). Dispersion entropy: A measure for time-series analysis. IEEE Signal Processing Letters, 23(5), 610-614.
+[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
+    complexity measure for sensor signal. Sensors, 19(23), 5203.
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -42,7 +42,7 @@ Thus, any of the implemented [probabilities estimators](@ref estimators) can be
 
 
 ### Complexity measures
-Other complexity measures, which strictly speaking don't compute entropies, and may or may not explicitly compute probability distributions, appear in the [Complexity measures](@ref) section.
+Other complexity measures, which strictly speaking don't compute entropies, and may or may not explicitly compute probability distributions, appear in the [Complexity measures](@ref complexity_measures) section.
 
 
 ## Input data

diff --git a/docs/src/probabilities.md b/docs/src/probabilities.md
@@ -24,6 +24,12 @@ SymbolicPermutation
 SpatialSymbolicPermutation
 ```
 
+## Dispersion (symbolic)
+
+```@docs
+Dispersion
+```
+
 ## Visitation frequency (binning)
 
 ```@docs

diff --git a/src/Entropies.jl b/src/Entropies.jl
@@ -16,6 +16,8 @@ include("symbolization/symbolize.jl")
 include("probabilities.jl")
 include("probabilities_estimators/probabilities_estimators.jl")
 include("entropies/entropies.jl")
+include("complexity_measures/complexity_measures.jl")
+
 include("deprecations.jl")
 
 

diff --git a/src/complexity_measures/complexity_measures.jl b/src/complexity_measures/complexity_measures.jl
@@ -0,0 +1 @@
+include("reverse_dispersion_entropy.jl")
diff --git a/src/complexity_measures/reverse_dispersion_entropy.jl b/src/complexity_measures/reverse_dispersion_entropy.jl
@@ -0,0 +1,91 @@
+export reverse_dispersion
+
+function distance_to_whitenoise(p::Probabilities, n_classes, m; normalize = false)
+    # We can safely skip non-occurring symbols, because they don't contribute
+    # to the sum in eq. 3 in Li et al. (2019)
+    Hrde = sum(abs2, p) - 1/(n_classes^m)
+
+    if normalize
+        # The factor `f` considers *all* possible symbols (also non-occurring)
+        f = n_classes^m
+        return Hrde / (1 - (1/f))
+    else
+        return Hrde
+    end
+end
+
+"""
+    reverse_dispersion(x::AbstractVector{T}, s = GaussianSymbolization(c = 5), m = 2, τ = 1,
+        normalize = true)
+
+
+Compute the reverse dispersion entropy complexity measure (Li et al., 2019)[^Li2019].
+
+## Algorithm
+
+Li et al. (2021)[^Li2019] defines the reverse dispersion entropy as
+
+```math
+H_{rde} = \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2.
+```
+
+where the probabilities ``p_i`` are obtained precisely as for the [`Dispersion`](@ref)
+probability estimator. Relative frequencies of dispersion patterns are computed using the
+symbolization scheme `s`, which defaults to symbolization using the normal cumulative
+distribution function (NCDF), as implemented by [`GaussianSymbolization`](@ref), using
+embedding dimension `m` and embedding delay `τ`.
+Recommended parameter values[^Li2018] are `m ∈ [2, 3]`, `τ = 1` for the embedding, and
+`c ∈ [3, 4, …, 8]` categories for the Gaussian mapping. If `normalize == true`, then
+the reverse dispersion entropy is normalized to `[0, 1]`.
+
+The minimum value of ``H_{rde}`` is zero and occurs precisely when the dispersion
+pattern distribution is flat, which occurs when all ``p_i``s are equal to ``1/c^m``.
+Because ``H_{rde} \\geq 0``, ``H_{rde}`` can therefore be said to be a measure of how far
+the dispersion pattern probability distribution is from white noise.
+
+## Example
+
+```jldoctest reverse_dispersion_example; setup = :(using Entropies)
+julia> x = repeat([0.5, 0.7, 0.1, -1.0, 1.11, 2.22, 4.4, 0.2, 0.2, 0.1], 10);
+
+julia> c, m = 3, 5;
+
+julia> reverse_dispersion(x, s = GaussianSymbolization(c = c), m = m, normalize = false)
+0.11372331532921814
+```
+
+!!! note
+    #### A clarification on notation
+
+    With ambiguous notation, Li et al. claim that
+
+    ``H_{rde} = \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2 = \\sum_{i = 1}^{c^m} p_i^2 - \\frac{1}{c^m}.``
+
+    But on the right-hand side of the equality, does the constant term appear within or
+    outside the sum? Using that ``P`` is a probability distribution by
+    construction (in step 4) , we see that the constant must appear *outside* the sum:
+
+    ```math
+    \\begin{aligned}
+    H_{rde} &= \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2
+    = \\sum_{i=1}^{c^m} p_i^2 - \\frac{2p_i}{c^m} + \\frac{1}{c^{2m}} \\\\
+    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\left(\\sum_i^{c^m} \\frac{2p_i}{c^m}\\right) + \\left( \\sum_{i=1}^{c^m} \\dfrac{1}{{c^{2m}}} \\right) \\\\
+    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\left(\\frac{2}{c^m} \\sum_{i=1}^{c^m} p_i \\right) +  \\dfrac{c^m}{c^{2m}} \\\\
+    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\frac{2}{c^m} (1) +  \\dfrac{1}{c^{m}} \\\\
+    &= \\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\dfrac{1}{c^{m}}.
+    \\end{aligned}
+    ```
+
+[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
+    complexity measure for sensor signal. Sensors, 19(23), 5203.
+"""
+function reverse_dispersion(x::AbstractVector{T}; s = GaussianSymbolization(5),
+        m = 2, τ = 1, normalize = true) where T <: Real
+    est = Dispersion(τ = τ, m = m, s = s)
+    p = probabilities(x, est)
+
+    # The following step combines distance information with the probabilities, so
+    # from here on, it is not possible to use `renyi_entropy` or similar methods, because
+    # we're not dealing with probabilities anymore.
+    Hrde = distance_to_whitenoise(p, s.c, m)
+end
diff --git a/src/entropies/convenience_definitions.jl b/src/entropies/convenience_definitions.jl
@@ -53,6 +53,20 @@ function entropy_wavelet(x; wavelet = Wavelets.WT.Daubechies{12}(), base = MathC
     entropy_renyi(x, est; base, q = 1)
 end
 
-function entropy_dispersion(args...)
+"""
+    entropy_dispersion(x; m = 2, τ = 1, s = GaussianSymbolization(3),
+        base = MathConstants.e)
+
+Compute the dispersion entropy. This function is just a convenience call to:
+```julia
+est = Dispersion(m = m, τ = τ, s = s)
+entropy_shannon(x, est; base)
+```
+See [`Dispersion`](@ref) for more info.
+"""
+function entropy_dispersion(x; wavelet = Wavelets.WT.Daubechies{12}(),
+        base = MathConstants.e)
 
-end
+    est = Dispersion(m = m, τ = τ, s = s)
+    entropy_renyi(x, est; base, q = 1)
+end
diff --git a/src/entropies/direct_entropies/entropy_dispersion.jl b/src/entropies/direct_entropies/entropy_dispersion.jl
diff --git a/src/entropies/entropies.jl b/src/entropies/entropies.jl
@@ -3,6 +3,5 @@ include("tsallis.jl")
 include("shannon.jl")
 include("convenience_definitions.jl")
 include("direct_entropies/nearest_neighbors/nearest_neighbors.jl")
-include("direct_entropies/entropy_dispersion.jl")
 
 # TODO: What else is included here from direct entropies?