diff --git a/docs/src/complexity_measures.md b/docs/src/complexity_measures.md index 8cfe618d5..e0fa6032e 100644 --- a/docs/src/complexity_measures.md +++ b/docs/src/complexity_measures.md @@ -1,7 +1,14 @@ -# Complexity measures +# [Complexity measures](@id complexity_measures) ## Sample entropy ## Approximate entropy +## Reverse dispersion entropy + +```@docs +reverse_dispersion +distance_to_whitenoise +``` + ## Disequilibrium diff --git a/docs/src/entropies.md b/docs/src/entropies.md index c643a8b87..3d72bfcb6 100644 --- a/docs/src/entropies.md +++ b/docs/src/entropies.md @@ -13,10 +13,21 @@ entropy_tsallis ``` ## Shannon entropy (convenience) + ```@docs entropy_shannon ``` +## Normalization + +The generic [`entropy_normalized`](@ref) normalizes any entropy value to the entropy of a +uniform distribution. We also provide [maximum entropy](@ref maximum_entropy) functions +that are useful for manual normalization. + +```@docs +entropy_normalized +``` + ## Indirect entropies Here we list functions which compute Shannon entropies via alternate means, without explicitly computing some probability distributions and then using the Shannon formulat. @@ -33,4 +44,4 @@ entropy_permutation entropy_spatial_permutation entropy_wavelet entropy_dispersion -``` \ No newline at end of file +``` diff --git a/docs/src/examples.md b/docs/src/examples.md index b60412367..acae371f7 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -145,3 +145,60 @@ for a in (ax, ay, az); axislegend(a); end for a in (ax, ay); hidexdecorations!(a; grid=false); end fig ``` + +## [Dispersion and reverse dispersion entropy](@id dispersion_examples) + +Here we reproduce parts of figure 3 in Li et al. (2019), computing reverse and regular dispersion entropy for a time series consisting of normally distributed noise with a single spike in the middle of the signal. We compute the entropies over a range subsets of the data, using a sliding window consisting of 70 data points, stepping the window 10 time steps at a time. + +Note: the results here are not exactly the same as in the original paper, because Li et +al. (2019) base their examples on randomly generated numbers and do not provide code that +specify random number seeds. + +```@example +using Entropies, DynamicalSystems, Random, CairoMakie, Distributions + +n = 1000 +ts = 1:n +x = [i == n ÷ 2 ? 50.0 : 0.0 for i in ts] +rng = Random.default_rng() +s = rand(rng, Normal(0, 1), n) +y = x .+ s + +ws = 70 +windows = [t:t+ws for t in 1:10:n-ws] +rdes = zeros(length(windows)) +des = zeros(length(windows)) +pes = zeros(length(windows)) + +m, c = 2, 6 +est_de = Dispersion(symbolization = GaussianSymbolization(c), m = m, τ = 1) + +for (i, window) in enumerate(windows) + rdes[i] = reverse_dispersion(y[window], est_de; normalize = true) + des[i] = entropy_renyi_norm(y[window], est_de) +end + +fig = Figure() + +a1 = Axis(fig[1,1]; xlabel = "Time step", ylabel = "Value") +lines!(a1, ts, y) +display(fig) + +a2 = Axis(fig[2, 1]; xlabel = "Time step", ylabel = "Value") +p_rde = scatterlines!([first(w) for w in windows], rdes, + label = "Reverse dispersion entropy", + color = :black, + markercolor = :black, marker = '●') +p_de = scatterlines!([first(w) for w in windows], des, + label = "Dispersion entropy", + color = :red, + markercolor = :red, marker = 'x', markersize = 20) + +axislegend(position = :rc) +ylims!(0, max(maximum(pes), 1)) +fig +``` + +[^Rostaghi2016]: Rostaghi, M., & Azami, H. (2016). Dispersion entropy: A measure for time-series analysis. IEEE Signal Processing Letters, 23(5), 610-614. +[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new + complexity measure for sensor signal. Sensors, 19(23), 5203. \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index 3609dc576..8d6c6a080 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -42,7 +42,7 @@ Thus, any of the implemented [probabilities estimators](@ref estimators) can be ### Complexity measures -Other complexity measures, which strictly speaking don't compute entropies, and may or may not explicitly compute probability distributions, appear in the [Complexity measures](@ref) section. +Other complexity measures, which strictly speaking don't compute entropies, and may or may not explicitly compute probability distributions, appear in the [Complexity measures](@ref complexity_measures) section. ## Input data diff --git a/docs/src/probabilities.md b/docs/src/probabilities.md index 42e5c5177..d2b34e9ec 100644 --- a/docs/src/probabilities.md +++ b/docs/src/probabilities.md @@ -24,6 +24,12 @@ SymbolicPermutation SpatialSymbolicPermutation ``` +## Dispersion (symbolic) + +```@docs +Dispersion +``` + ## Visitation frequency (binning) ```@docs diff --git a/docs/src/utils.md b/docs/src/utils.md index d79865b8d..a48ef3dc7 100644 --- a/docs/src/utils.md +++ b/docs/src/utils.md @@ -26,3 +26,17 @@ OrdinalPattern ```@docs Entropies.encode_motif ``` + +### Normalization + +```@docs +alphabet_length +``` + +#### [Maximum entropy](@id maximum_entropy) + +```@docs +maxentropy_tsallis +maxentropy_renyi +maxentropy_shannon +``` diff --git a/src/Entropies.jl b/src/Entropies.jl index a6af61930..21f237df4 100644 --- a/src/Entropies.jl +++ b/src/Entropies.jl @@ -16,6 +16,8 @@ include("symbolization/symbolize.jl") include("probabilities.jl") include("probabilities_estimators/probabilities_estimators.jl") include("entropies/entropies.jl") +include("complexity_measures/complexity_measures.jl") + include("deprecations.jl") diff --git a/src/complexity_measures/complexity_measures.jl b/src/complexity_measures/complexity_measures.jl new file mode 100644 index 000000000..a3719a54e --- /dev/null +++ b/src/complexity_measures/complexity_measures.jl @@ -0,0 +1 @@ +include("reverse_dispersion_entropy.jl") diff --git a/src/complexity_measures/reverse_dispersion_entropy.jl b/src/complexity_measures/reverse_dispersion_entropy.jl new file mode 100644 index 000000000..7b66b39ae --- /dev/null +++ b/src/complexity_measures/reverse_dispersion_entropy.jl @@ -0,0 +1,76 @@ +export reverse_dispersion +export distance_to_whitenoise + +# Note: this is not an entropy estimator, so we don't use the entropy_xxx_norm interface +# for normalization, even though we rely on `alphabet_length`. +""" + distance_to_whitenoise(p::Probabilities, estimator::Dispersion; normalize = false) + +Compute the distance of the probability distribution `p` from a uniform distribution, +given the parameters of `estimator` (which must be known beforehand). + +If `normalize == true`, then normalize the value to the interval `[0, 1]` by using the +parameters of `estimator`. + +Used to compute reverse dispersion entropy([`reverse_dispersion`](@ref); +Li et al., 2019[^Li2019]). + +[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new + complexity measure for sensor signal. Sensors, 19(23), 5203. +""" +function distance_to_whitenoise(p::Probabilities, est::Dispersion; normalize = false) + # We can safely skip non-occurring symbols, because they don't contribute + # to the sum in eq. 3 in Li et al. (2019) + Hrde = sum(abs2, p) - (1 / alphabet_length(est)) + + if normalize + return Hrde / (1 - (1 / alphabet_length(est))) + else + return Hrde + end +end + +# Note again: this is a *complexity measure*, not an entropy estimator, so we don't use +# the entropy_xxx_norm interface for normalization, even though we rely on `alphabet_length`. +""" + reverse_dispersion(x::AbstractVector{T}, est::Dispersion = Dispersion(); + normalize = true) where T <: Real + +Compute the reverse dispersion entropy complexity measure (Li et al., 2019)[^Li2019]. + +## Description + +Li et al. (2021)[^Li2019] defines the reverse dispersion entropy as + +```math +H_{rde} = \\sum_{i = 1}^{c^m} \\left(p_i - \\dfrac{1}{{c^m}} \\right)^2 = +\\left( \\sum_{i=1}^{c^m} p_i^2 \\right) - \\dfrac{1}{c^{m}} +``` +where the probabilities ``p_i`` are obtained precisely as for the [`Dispersion`](@ref) +probability estimator. Relative frequencies of dispersion patterns are computed using the +given `symbolization` scheme , which defaults to symbolization using the normal cumulative +distribution function (NCDF), as implemented by [`GaussianSymbolization`](@ref), using +embedding dimension `m` and embedding delay `τ`. +Recommended parameter values[^Li2018] are `m ∈ [2, 3]`, `τ = 1` for the embedding, and +`c ∈ [3, 4, …, 8]` categories for the Gaussian mapping. + +If `normalize == true`, then the reverse dispersion entropy is normalized to `[0, 1]`. + +The minimum value of ``H_{rde}`` is zero and occurs precisely when the dispersion +pattern distribution is flat, which occurs when all ``p_i``s are equal to ``1/c^m``. +Because ``H_{rde} \\geq 0``, ``H_{rde}`` can therefore be said to be a measure of how far +the dispersion pattern probability distribution is from white noise. + +[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new + complexity measure for sensor signal. Sensors, 19(23), 5203. +""" +function reverse_dispersion(x::AbstractVector{T}, est::Dispersion = Dispersion(); + normalize = true) where T <: Real + + p = probabilities(x, est) + + # The following step combines distance information with the probabilities, so + # from here on, it is not possible to use `renyi_entropy` or similar methods, because + # we're not dealing with probabilities anymore. + Hrde = distance_to_whitenoise(p, est, normalize = normalize) +end diff --git a/src/entropies/convenience_definitions.jl b/src/entropies/convenience_definitions.jl index e081e213c..3ef5c647b 100644 --- a/src/entropies/convenience_definitions.jl +++ b/src/entropies/convenience_definitions.jl @@ -53,6 +53,20 @@ function entropy_wavelet(x; wavelet = Wavelets.WT.Daubechies{12}(), base = MathC entropy_renyi(x, est; base, q = 1) end -function entropy_dispersion(args...) +""" + entropy_dispersion(x; m = 2, τ = 1, s = GaussianSymbolization(3), + base = MathConstants.e) + +Compute the dispersion entropy. This function is just a convenience call to: +```julia +est = Dispersion(m = m, τ = τ, s = s) +entropy_shannon(x, est; base) +``` +See [`Dispersion`](@ref) for more info. +""" +function entropy_dispersion(x; wavelet = Wavelets.WT.Daubechies{12}(), + base = MathConstants.e) -end \ No newline at end of file + est = Dispersion(m = m, τ = τ, s = s) + entropy_renyi(x, est; base, q = 1) +end diff --git a/src/entropies/direct_entropies/entropy_dispersion.jl b/src/entropies/direct_entropies/entropy_dispersion.jl deleted file mode 100644 index 9a1ca301d..000000000 --- a/src/entropies/direct_entropies/entropy_dispersion.jl +++ /dev/null @@ -1,61 +0,0 @@ -using DelayEmbeddings -using StatsBase - -export entropy_dispersion - -""" - embed_symbols(s::AbstractVector{T}, m, τ) {where T} → Dataset{m, T} - -From the symbols `sᵢ ∈ s`, create the embedding vectors (with dimension `m` and lag `τ`): - -```math -s_i^D = \\{s_i, s_{i+\\tau}, \\ldots, s_{i+(m-1)\\tau} \\} -```, - -where ``i = 1, 2, \\ldots, N - (m - 1)\\tau`` and `N = length(s)`. -""" -function embed_symbols(symbols::AbstractVector, m, τ) - return embed(symbols, m, τ) -end - - -function dispersion_histogram(x::AbstractDataset, N, m, τ) - return _non0hist(x.data, (N - (m - 1)*τ)) -end - - -""" - entropy_dispersion(x, s = GaussianSymbolization(n_categories = 5); - m = 3, τ = 1, q = 1, base = MathConstants.e) - -Compute the (order-`q` generalized) dispersion entropy to the given `base` of the -univariate time series `x`. Relative frequencies of dispersion patterns are computed using -the symbolization scheme `s` with embedding dimension `m` and embedding delay `τ`. - -Recommended parameter values[^Li2018] are `m ∈ [2, 3]`, `τ = 1`, and -`n_categories ∈ [3, 4, …, 8]` for the Gaussian mapping (defaults to 5). - -## Description - -Dispersion entropy characterizes the complexity and irregularity of a time series. -This implementation follows the description in Li et al. (2018)[^Li2018], which is -based on Azami & Escudero (2018)[^Azami2018], additionally allowing the computation of -generalized dispersion entropy of order `q` (default is `q = 1`, which is the Shannon entropy). - -## Data requirements - -Li et al. (2018) recommends that `x` has at least 1000 data points. - -[^Li2018]: Li, G., Guan, Q., & Yang, H. (2018). Noise reduction method of underwater acoustic signals based on CEEMDAN, effort-to-compress complexity, refined composite multiscale dispersion entropy and wavelet threshold denoising. Entropy, 21(1), 11. -[^Azami2018]: Azami, H., & Escudero, J. (2018). Coarse-graining approaches in univariate multiscale sample and dispersion entropy. Entropy, 20(2), 138. -""" -function entropy_dispersion(x::AbstractVector, - s::GaussianSymbolization = GaussianSymbolization(n_categories = 5); - m = 2, τ = 1, q = 1, base = MathConstants.e) - symbols = symbolize(x, s) - dispersion_patterns = embed(symbols, m, τ) - N = length(x) - - hist = dispersion_histogram(dispersion_patterns, N, m, τ) - entropy_renyi(hist, q = q, base = base) -end diff --git a/src/entropies/entropies.jl b/src/entropies/entropies.jl index 8aa8f0a26..94858b26a 100644 --- a/src/entropies/entropies.jl +++ b/src/entropies/entropies.jl @@ -3,6 +3,6 @@ include("tsallis.jl") include("shannon.jl") include("convenience_definitions.jl") include("direct_entropies/nearest_neighbors/nearest_neighbors.jl") -include("direct_entropies/entropy_dispersion.jl") - # TODO: What else is included here from direct entropies? + +include("normalized_entropy.jl") diff --git a/src/entropies/normalized_entropy.jl b/src/entropies/normalized_entropy.jl new file mode 100644 index 000000000..848c1c365 --- /dev/null +++ b/src/entropies/normalized_entropy.jl @@ -0,0 +1,80 @@ +export entropy_normalized + +""" + entropy_normalized(f::Function, x, est::ProbabilitiesEstimator, args...; kwargs...) + +Convenience syntax for normalizing to the entropy of uniform probability distribution. +First estimates probabilities as `p::Probabilities = f(x, est, args...; kwargs...`), +then calls `entropy_normalized(f, p, args...; kwargs...)`. + +Normalization is only defined for estimators for which [`alphabet_length`](@ref) is defined, +meaning that the total number of states or symbols is known beforehand. + + entropy_normalized(f::Function, p::Probabilities, est::ProbabilitiesEstimator, args...; + kwargs...) + +Normalize the entropy, as returned by the entropy function `f` called with the given +arguments (i.e. `f(p, args...; kwargs...)`), to the entropy of a uniform distribution, +inferring [`alphabet_length`](@ref) from `est`. + + entropy_normalized(f::Function, p::Probabilities, args...; kwargs...) + +The same as above, but infers alphabet length from counting how many elements +are in `p` (zero probabilities are counted). + +## Examples + +Computing normalized entropy from scratch: + +```julia +x = rand(100) +entropy_normalized(entropy_renyi, x, Dispersion()) +``` + +Computing normalized entropy from pre-computed probabilities with known parameters: + +```julia +x = rand(100) +est = Dispersion(m = 3, symbolization = GaussianSymbolization(c = 4)) +p = probabilities(x, est) +entropy_normalized(entropy_renyi, p, est) +``` + +Computing normalized entropy, assumming there are `N = 10` total states: + +```julia +N = 10 +p = Probabilities(rand(10)) +entropy_normalized(entropy_renyi, p, est) +``` + +!!! note "Normalized output range" + For Rényi entropy (e.g. Kumar et al., 1986), and for Tsallis entropy (Tsallis, 1998), + normalizing to the uniform distribution ensures that the entropy lies in + the interval `[0, 1]`. For other entropies and parameter choices, the resulting entropy + is not guaranteed to lie in `[0, 1]`. It is up to the user to decide whether + normalizing to a uniform distribution makes sense for their use case. + + +[^Kumar1986]: Kumar, U., Kumar, V., & Kapur, J. N. (1986). Normalized measures of entropy. + International Journal Of General System, 12(1), 55-69. +[^Tsallis1998]: Tsallis, C. (1988). Possible generalization of Boltzmann-Gibbs statistics. + Journal of statistical physics, 52(1), 479-487. +""" +function entropy_normalized(f::Function, x, est::ProbEst, args...; kwargs...) + N = alphabet_length(est) + p_uniform = Probabilities(repeat([1/N], N)) + return f(x, est, args...; kwargs...) / f(p_uniform, args...; kwargs...) +end + +function entropy_normalized(f::Function, x::Probabilities, est::ProbEst, args...; kwargs...) + N = alphabet_length(est) + p_uniform = Probabilities(repeat([1/N], N)) + return f(x, args...; kwargs...) / f(p_uniform; kwargs...) +end + +function entropy_normalized(f::Function, x::Probabilities, args...; kwargs...) + N = length(x) + p_uniform = Probabilities(repeat([1/N], N)) + return f(x, args...; kwargs...) / f(p_uniform; kwargs...) +end diff --git a/src/entropies/renyi.jl b/src/entropies/renyi.jl index a12491e5f..fda65b130 100644 --- a/src/entropies/renyi.jl +++ b/src/entropies/renyi.jl @@ -1,4 +1,5 @@ export entropy_renyi +export maxentropy_renyi """ entropy_renyi(p::Probabilities; q = 1.0, base = MathConstants.e) @@ -30,7 +31,11 @@ also known as Hartley entropy), or the correlation entropy [^Rényi1960]: A. Rényi, _Proceedings of the fourth Berkeley Symposium on Mathematics, Statistics and Probability_, pp 547 (1960) +[^Kumar1986]: Kumar, U., Kumar, V., & Kapur, J. N. (1986). Normalized measures of entropy. + International Journal Of General System, 12(1), 55-69. [^Shannon1948]: C. E. Shannon, Bell Systems Technical Journal **27**, pp 379 (1948) + +See also: [`maxentropy_renyi`](@ref). """ function entropy_renyi end @@ -89,3 +94,14 @@ function entropy_renyi!(p, x, est; q = 1.0, α = nothing, base = MathConstants.e probabilities!(p, x, est) entropy_renyi(p; q = q, base = base) end + +""" + maxentropy_renyi(N::Int, base = MathConstants.e) + +Convenience function that computes the maximum value of the order-`q` generalized +Rényi entropy for an `N`-element probability distribution, i.e. +``\\log_{base}(N)``, which is useful for normalization when `N` is known. + +See also [`entropy_renyi`](@ref), [`entropy_normalized`](@ref). +""" +maxentropy_renyi(N::Int; base = MathConstants.e) = log(base, N) diff --git a/src/entropies/shannon.jl b/src/entropies/shannon.jl index ee2a50350..a9604c567 100644 --- a/src/entropies/shannon.jl +++ b/src/entropies/shannon.jl @@ -1,11 +1,26 @@ export entropy_shannon +export maxentropy_shannon """ entropy_shannon(args...; base = MathConstants.e) -Equivalent to `entropy_renyi(args...; base, q = 1)` and provided solely for convenience. -Compute the Shannon entropy, given by + +Equivalent to `entropy_renyi(args...; base = base, q = 1)` and provided solely for convenience. +Computes the Shannon entropy, given by ```math H(p) = - \\sum_i p[i] \\log(p[i]) ``` + +See also: [`maxentropy_shannon`](@ref). +""" +entropy_shannon(args...; base = MathConstants.e) = + entropy_renyi(args...; base = base, q = 1) + +""" + maxentropy_shannon(N::Int, q; base = MathConstants.e) + +Convenience function that computes the maximum value of the Shannon entropy, i.e. +``log_{base}(N)``, which is useful for normalization when `N` is known. + +See also [`entropy_shannon`](@ref), [`entropy_normalized`](@ref). """ -entropy_shannon(args...; base = MathConstants.e) = entropy_renyi(args...; base, q = 1) +maxentropy_shannon(N::Int; base = MathConstants.e) = maxentropy_renyi(N, base = base) diff --git a/src/entropies/tsallis.jl b/src/entropies/tsallis.jl index 51c7765cd..cc6178528 100644 --- a/src/entropies/tsallis.jl +++ b/src/entropies/tsallis.jl @@ -1,20 +1,25 @@ export entropy_tsallis +export maxentropy_tsallis """ - entropy_tsallis(p::Probabilities; k = 1, q = 0) + entropy_tsallis(p::Probabilities; k = 1, q = 0, base = MathConstants.e) +Compute the Tsallis entropy of `p` (Tsallis, 1998)[^Tsallis1988]. -Compute the Tsallis entropy of `x` (Tsallis, 1998)[^Tsallis1988]. +`base` only applies in the limiting case `q == 1`, in which the Tsallis entropy reduces +to Shannon entropy. - entropy_tsallis(x::Array_or_Dataset, est; k = 1, q = 0) + entropy_tsallis(x::Array_or_Dataset, est; k = 1, q = 0, base = MathConstants.e) A convenience syntax, which calls first `probabilities(x, est)` -and then calculates the entropy of the result (and thus `est` can be anything +and then calculates the Tsallis entropy of the result (and thus `est` can be anything the [`probabilities`](@ref) function accepts). ## Description -The Tsallis entropy is a generalization of the Boltzmann–Gibbs entropy, + +The Tsallis entropy is a generalization of the Boltzmann-Gibbs entropy, with `k` standing for the Boltzmann constant. It is defined as + ```math S_q(p) = \\frac{k}{q - 1}\\left(1 - \\sum_{i} p[i]^q\\right) ``` @@ -23,14 +28,40 @@ S_q(p) = \\frac{k}{q - 1}\\left(1 - \\sum_{i} p[i]^q\\right) Tsallis, C. (1988). Possible generalization of Boltzmann-Gibbs statistics. Journal of statistical physics, 52(1), 479-487. """ -function entropy_tsallis(prob::Probabilities; k = 1, q = 0) +function entropy_tsallis(prob::Probabilities; k = 1, q = 0, base = MathConstants.e) # As for entropy_renyi, we copy because if someone initialized Probabilities with 0s, # they'd probably want to index the zeros as well. probs = Iterators.filter(!iszero, prob.p) - return k/(q-1)*(1 - sum(p^q for p in probs)) + if q ≈ 1 + return -sum(p * log(base, p) for p in probs) # Shannon entropy + else + return k/(q-1)*(1 - sum(p^q for p in probs)) + end end -function entropy_tsallis(x, est; kwargs...) +function entropy_tsallis(x::Array_or_Dataset, est; kwargs...) p = probabilities(x, est) entropy_tsallis(p; kwargs...) -end \ No newline at end of file +end + + +""" + maxentropy_tsallis(N::Int, q; k = 1, base = MathConstants.e) + +Convenience function that computes the maximum value of the generalized Tsallis +entropy with parameters `q` and `k` for an `N`-element probability distribution, i.e. +``\\dfrac{N^{1 - q} - 1}{(1 - q)}``, which is useful for normalization when `N` and `q` +is known. + +If `q == 1`, then `log(base, N)` is returned. + +See also [`entropy_tsallis`](@ref), [`entropy_normalized`](@ref), +[`maxentropy_tsallis`](@ref). +""" +function maxentropy_tsallis(N::Int, q; k = 1, base = MathConstants.e) + if q ≈ 1.0 + return log(base, N) + else + return k*(N^(1 - q) - 1) / (1 - q) + end +end diff --git a/src/probabilities_estimators/dispersion/dispersion.jl b/src/probabilities_estimators/dispersion/dispersion.jl new file mode 100644 index 000000000..45d652584 --- /dev/null +++ b/src/probabilities_estimators/dispersion/dispersion.jl @@ -0,0 +1,126 @@ +using DelayEmbeddings +using StatsBase + +export Dispersion + +""" + Dispersion(; symbolization = GaussianSymbolization(c = 5), m = 2, τ = 1, + check_unique = true) + +A probability estimator based on dispersion patterns, originally used by +Rostaghi & Azami, 2016[^Rostaghi2016] to compute the "dispersion entropy", which +characterizes the complexity and irregularity of a time series. + +Relative frequencies of dispersion patterns are computed using the symbolization scheme +`s` with embedding dimension `m` and embedding delay `τ`. Recommended parameter +values[^Li2018] are `m ∈ [2, 3]`, `τ = 1` for the embedding, and `c ∈ [3, 4, …, 8]` +categories for the Gaussian symbol mapping. + +## Description + +Assume we have a univariate time series ``X = \\{x_i\\}_{i=1}^N``. First, this time series +is symbolized using `symbolization`, which default to [`GaussianSymbolization`](@ref), +which uses the normal cumulative distribution function (CDF) for symbolization. +Other choices of CDFs are also possible, but Entropies.jl currently only implements +[`GaussianSymbolization`](@ref), which was used in Rostaghi & Azami (2016). This step +results in an integer-valued symbol time series ``S = \\{ s_i \\}_{i=1}^N``, where +``s_i \\in [1, 2, \\ldots, c]``. + +Next, the symbol time series ``S`` is embedded into an +``m``-dimensional time series, using an embedding lag of ``\\tau = 1``, which yields a +total of ``N - (m - 1)\\tau`` points, or "dispersion patterns". Because each ``z_i`` can +take on ``c`` different values, and each embedding point has ``m`` values, there +are ``c^m`` possible dispersion patterns. This number is used for normalization when +computing dispersion entropy. + +### Computing dispersion probabilities and entropy + +A probability distribution ``P = \\{p_i \\}_{i=1}^{c^m}``, where +``\\sum_i^{c^m} p_i = 1``, can then be estimated by counting and sum-normalising +the distribution of dispersion patterns among the embedding vectors. +Note that dispersion patterns that are not present are not counted. Therefore, you'll +always get non-zero probabilities using the `Dispersion` probability estimator. + +To compute dispersion entropy of order `q` to a given `base` on the +univariate input time series `x`, do: + +```julia +entropy_renyi(x, Dispersion(), base = 2, q = 1) +``` + +## Data requirements and parameters + +The input must have more than one unique element for the Gaussian mapping to be +well-defined. Li et al. (2018) recommends that `x` has at least 1000 data points. + +If `check_unique == true` (default), then it is checked that the input has +more than one unique value. If `check_unique == false` and the input only has one +unique element, then a `InexactError` is thrown when trying to compute probabilities. + +See also: [`entropy_dispersion`](@ref), [`GaussianSymbolization`](@ref). + +!!! note "Why 'dispersion patterns'?" + Each embedding vector is called a "dispersion pattern". Why? Let's consider the case + when ``m = 5`` and ``c = 3``, and use some very imprecise terminology for illustration: + + When ``c = 3``, values clustering far below mean are in one group, values clustered + around the mean are in one group, and values clustering far above the mean are in a + third group. Then the embedding vector ``[2, 2, 2, 2, 2]`` consists of values that are + relatively close together (close to the mean), so it represents a set of numbers that + are not very spread out (less dispersed). The embedding vector ``[1, 1, 2, 3, 3]``, + however, represents numbers that are much more spread out (more dispersed), because the + categories representing "outliers" both above and below the mean are represented, + not only values close to the mean. + +[^Rostaghi2016]: Rostaghi, M., & Azami, H. (2016). Dispersion entropy: A measure for time-series analysis. IEEE Signal Processing Letters, 23(5), 610-614. +[^Li2018]: Li, G., Guan, Q., & Yang, H. (2018). Noise reduction method of underwater acoustic signals based on CEEMDAN, effort-to-compress complexity, refined composite multiscale dispersion entropy and wavelet threshold denoising. Entropy, 21(1), 11. +""" +Base.@kwdef struct Dispersion{S <: SymbolizationScheme} <: ProbabilitiesEstimator + symbolization::S = GaussianSymbolization(c = 5) + m::Int = 2 + τ::Int = 1 + check_unique::Bool = false +end + +export entropy_dispersion + +""" + embed_symbols(symbols::AbstractVector{T}, m, τ) {where T} → Dataset{m, T} + +From the symbols `sᵢ ∈ s`, create the embedding vectors (with dimension `m` and lag `τ`): + +```math +s_i^D = \\{s_i, s_{i+\\tau}, \\ldots, s_{i+(m-1)\\tau} \\} +```, + +where ``i = 1, 2, \\ldots, N - (m - 1)\\tau`` and `N = length(s)`. +""" +function embed_symbols(symbols::AbstractVector, m, τ) + return embed(symbols, m, τ) +end + +function dispersion_histogram(x::AbstractDataset, N, m, τ) + return _non0hist(x.data, (N - (m - 1)*τ)) +end + +function probabilities(x::AbstractVector, est::Dispersion) + if est.check_unique + if length(unique(x)) == 1 + symbols = repeat([1], length(x)) + else + symbols = symbolize(x, est.symbolization) + end + else + symbols = symbolize(x, est.symbolization) + end + N = length(x) + + # We must use genembed, not embed, to make sure the zero lag is included + m, τ = est.m, est.τ + τs = tuple((x for x in 0:-τ:-(m-1)*τ)...) + dispersion_patterns = genembed(symbols, τs, ones(m)) + hist = dispersion_histogram(dispersion_patterns, N, est.m, est.τ) + p = Probabilities(hist) +end + +alphabet_length(est::Dispersion)::Int = est.symbolization.c ^ est.m diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl index fc5987c3e..9bee968fa 100644 --- a/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl +++ b/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl @@ -45,3 +45,5 @@ function probabilities(x::AbstractVector{T}, est::SymbolicAmplitudeAwarePermutat p = symprobs(πs, wts, normalize = true) Probabilities(p) end + +alphabet_length(est::SymbolicAmplitudeAwarePermutation)::Int = factorial(est.m) diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl index b77956d46..9caa7f21e 100644 --- a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl +++ b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl @@ -285,3 +285,5 @@ function entropy_renyi!( ps = probabilities!(s, x, est) entropy_renyi(ps, α = α, base = base) end + +alphabet_length(est::SymbolicPermutation)::Int = factorial(est.m) diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl index 4694fcee7..d7fcd4f73 100644 --- a/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl +++ b/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl @@ -39,3 +39,5 @@ function probabilities(x::AbstractVector{T}, est::SymbolicWeightedPermutation) w Probabilities(symprobs(πs, wts, normalize = true)) end + +alphabet_length(est::SymbolicWeightedPermutation)::Int = factorial(est.m) diff --git a/src/probabilities_estimators/probabilities_estimators.jl b/src/probabilities_estimators/probabilities_estimators.jl index ba54fda28..4f3cc8879 100644 --- a/src/probabilities_estimators/probabilities_estimators.jl +++ b/src/probabilities_estimators/probabilities_estimators.jl @@ -3,4 +3,7 @@ include("rectangular_binning/rectangular_binning.jl") include("permutation_ordinal/symbolic.jl") include("kernel_density.jl") include("timescales/timescales.jl") -include("transfer_operator/transfer_operator.jl") \ No newline at end of file +include("transfer_operator/transfer_operator.jl") +include("dispersion/dispersion.jl") + +include("utils.jl") diff --git a/src/probabilities_estimators/utils.jl b/src/probabilities_estimators/utils.jl new file mode 100644 index 000000000..b0927a2d5 --- /dev/null +++ b/src/probabilities_estimators/utils.jl @@ -0,0 +1,21 @@ +export alphabet_length + +""" + alphabet_length(estimator) → Int + +Returns the total number of possible symbols/states implied by `estimator`. +If the total number of states cannot be known a priori, an error is thrown. +Primarily used for normalization of entropy values computed using symbolic estimators. + +## Examples + +```jldoctest setup = :(using Entropies) +julia> est = SymbolicPermutation(m = 4) +SymbolicPermutation{typeof(Entropies.isless_rand)}(1, 4, Entropies.isless_rand) + +julia> alphabet_length(est) +24 +``` +""" +alphabet_length(est) = + throw(error("alphabet_length not implemented for estimator of type $(typeof(est))")) diff --git a/src/symbolization/GaussianSymbolization.jl b/src/symbolization/GaussianSymbolization.jl index 0d426fa44..6ceb4d11b 100644 --- a/src/symbolization/GaussianSymbolization.jl +++ b/src/symbolization/GaussianSymbolization.jl @@ -3,17 +3,26 @@ using Statistics, QuadGK export GaussianSymbolization """ - GaussianSymbolization(; n_categories::Int = 5) + GaussianSymbolization(; c::Int = 3) -A symbolization scheme where the elements of `x`, which is the time series -``x = \\{x_j, j = 1, 2, \\ldots, N \\}``, is to a new time series -``y = \\{y_j, j = 1, 2, \\ldots, N \\}`` where ``y_j \\in (0, 1)`` and +A symbolization scheme where the elements of `x` are symbolized into `c` distinct integer +categories using the normal cumulative distribution function (NCDF). -```math -y_j = \\dfrac{1}{\\sigma\\sqrt{2\\pi}} \\int_{x = -\\infty}^{x_j} \\exp{\\dfrac{-{(x-\\mu)}^2}{2{\\sigma}^2}} dx, -``` +## Algorithm + +Assume we have a univariate time series ``X = \\{x_i\\}_{i=1}^N``. `GaussianSymbolization` +first maps each ``x_i`` to a new real number ``y_i \\in [0, 1]`` by using the normal +cumulative distribution function (CDF), ``x_i \\to y_i : y_i = \\dfrac{1}{ \\sigma + \\sqrt{2 \\pi}} \\int_{-\\infty}^{x_i} e^{(-(x_i - \\mu)^2)/(2 \\sigma^2)} dx``, +where ``\\mu`` and ``\\sigma`` are the empirical mean and standard deviation of ``X``. -where ``\\mu`` and ``\\sigma`` are the mean and standard deviations of `x`. +Next, each ``y_i`` is linearly mapped to an integer +``z_i \\in [1, 2, \\ldots, c]`` using the map +``y_i \\to z_i : z_i = R(y_i(c-1) + 0.5)``, where ``R`` indicates rounding up to the +nearest integer. This procedure subdivides the interval ``[0, 1]`` into ``c`` +different subintervals that form a covering of ``[0, 1]``, and assigns each ``y_i`` to one +of these subintervals. The original time series ``X`` is thus transformed to a symbol time +series ``S = \\{ s_i \\}_{i=1}^N``, where ``s_i \\in [1, 2, \\ldots, c]``. # Usage @@ -27,7 +36,7 @@ scheme `s`. ```jldoctest; setup = :(using Entropies) julia> x = [0.1, 0.4, 0.7, -2.1, 8.0, 0.9, -5.2]; -julia> Entropies.symbolize(x, GaussianSymbolization(5)) +julia> Entropies.symbolize(x, GaussianSymbolization(c = 5)) 7-element Vector{Int64}: 3 3 @@ -40,19 +49,19 @@ julia> Entropies.symbolize(x, GaussianSymbolization(5)) See also: [`symbolize`](@ref). """ -Base.@kwdef struct GaussianSymbolization{I <: Integer} - n_categories::I +Base.@kwdef struct GaussianSymbolization{I <: Integer} <: SymbolizationScheme + c::I = 3 end g(xᵢ, μ, σ) = exp((-(xᵢ - μ)^2)/(2σ^2)) """ - map_to_category(yⱼ, n_categories) + map_to_category(yⱼ, c) -Map the value `yⱼ ∈ (0, 1)` to an integer `[1, 2, …, n_categories]`. +Map the value `yⱼ ∈ (0, 1)` to an integer `[1, 2, …, c]`. """ -function map_to_category(yⱼ, n_categories) - zⱼ = ceil(Int, yⱼ * (n_categories - 1) + 1/2) +function map_to_category(yⱼ, c) + zⱼ = ceil(Int, yⱼ * (c - 1) + 1/2) return zⱼ end @@ -65,5 +74,5 @@ function symbolize(x::AbstractVector, s::GaussianSymbolization) k = 1/(σ*sqrt(2π)) yⱼs = [k * first(quadgk(x -> g(x, μ, σ), -Inf, xᵢ)) for xᵢ in x] - return map_to_category.(yⱼs, s.n_categories) + return map_to_category.(yⱼs, s.c) end diff --git a/test/complexity_measures.jl b/test/complexity_measures.jl new file mode 100644 index 000000000..44b5e3e5c --- /dev/null +++ b/test/complexity_measures.jl @@ -0,0 +1,7 @@ +using Entropies, Test + +@testset begin "Reverse dispersion entropy" + x = rand(100) + @test reverse_dispersion(x) isa Real + @test 0.0 <= reverse_dispersion(x, normalize = true) <= 1.0 +end diff --git a/test/dispersion.jl b/test/dispersion.jl new file mode 100644 index 000000000..ed7f2d71d --- /dev/null +++ b/test/dispersion.jl @@ -0,0 +1,81 @@ +using Entropies, Test + +@testset "Dispersion methods" begin + x = rand(100) + + @testset "Dispersion" begin + + @testset "Internals" begin + # Li et al. (2018) recommends using at least 1000 data points when estimating + # dispersion entropy. + x = rand(1000) + c = 4 + m = 4 + τ = 1 + s = GaussianSymbolization(c = c) + + # Symbols should be in the set [1, 2, …, c]. + symbols = Entropies.symbolize(x, s) + @test all([s ∈ collect(1:c) for s in symbols]) + + # Dispersion patterns should have a normalized histogram that sums to 1.0. + dispersion_patterns = DelayEmbeddings.embed(symbols, m, τ) + hist = Entropies.dispersion_histogram(dispersion_patterns, length(x), m, τ) + @test sum(hist) ≈ 1.0 + end + + # Test case from Rostaghi & Azami (2016)'s dispersion entropy paper. The + # symbolization step is tested separately in the "Symbolization" test set. + # We here start from pre-computed symbols `s`. + x = [0.82,0.75,0.21,0.94,0.52,0.05,0.241,0.75,0.35,0.43,0.11,0.87] + m, n_classes = 2, 3 + est = Dispersion(m = m, symbolization = GaussianSymbolization(c = n_classes)) + + # Take only the non-zero probabilities from the paper (in `dispersion_histogram`, + # we don't count zero-probability bins, so eliminate zeros for comparison). + ps_paper = [1/11, 0/11, 3/11, 2/11, 1/11, 0/11, 1/11, 2/11, 1/11] |> sort + ps_paper = ps_paper[findall(ps_paper .> 0)] + + ps = probabilities(x, est) + @test ps |> sort == ps_paper + + # There is probably a typo in Rostaghi & Azami (2016). They state that the + # non-normalized dispersion entropy is 1.8642. However, with identical probabilies, + # we obtain non-normalized dispersion entropy of 1.8462. + res = entropy_renyi(x, est, base = MathConstants.e, q = 1) + @test round(res, digits = 4) == 1.8462 + + # Again, probabilities are identical up to this point, but the values we get differ + # slightly from the paper. They get normalized DE of 0.85, but we get 0.84. 0.85 is + # the normalized DE you'd get by manually normalizing the (erroneous) value from + # their previous step. + res_norm = entropy_normalized(entropy_renyi, x, est, base = MathConstants.e, q = 1) + @test round(res_norm, digits = 2) == 0.84 + end + + @testset "Reverse dispersion entropy" begin + + @testset "Distance to whitenoise" begin + m, n_classes = 2, 2 + est = Dispersion(m = m, symbolization = GaussianSymbolization(c = n_classes)) + + # Reverse dispersion entropy is 0 when all probabilities are identical and equal + # to 1/(n_classes^m). + flat_dist = Probabilities(repeat([1/m^n_classes], m^n_classes)) + Hrde_minimal = distance_to_whitenoise(flat_dist, est, normalize = false) + @test round(Hrde_minimal, digits = 7) ≈ 0.0 + + # Reverse dispersion entropy is maximal when there is only one non-zero dispersal + # pattern. Then reverse dispersion entropy is + # 1 - 1/(n_classes^m). When normalizing to this value, the RDE should be 1.0. + m, n_classes = 2, 2 + single_element_dist = Probabilities([1.0, 0.0, 0.0, 0.0]) + Hrde_maximal = distance_to_whitenoise(single_element_dist, est, + normalize = false) + Hrde_maximal_norm = distance_to_whitenoise(single_element_dist, est, + normalize = true) + @test round(Hrde_maximal, digits = 7) ≈ 1 - 1/(n_classes^m) + @test round(Hrde_maximal_norm, digits = 7) ≈ 1.0 + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 5c8ab8585..f43235f40 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,8 +8,10 @@ using Neighborhood: KDTree, BruteForce # TODO: This is how the tests should look like in the end: defaultname(file) = splitext(basename(file))[1] testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include(file); end -@testset "Entopies.jl" begin +@testset "Entropies.jl" begin testfile("timescales.jl") + testfile("dispersion.jl") + testfile("complexity_measures.jl") end @testset "Histogram estimation" begin @@ -127,14 +129,20 @@ end # Li et al. (2018) recommends using at least 1000 data points when estimating # dispersion entropy. x = rand(1000) - n_categories = 4 + c = 4 m = 4 τ = 1 - s = GaussianSymbolization(n_categories = n_categories) + s = GaussianSymbolization(c = c) - # Symbols should be in the set [1, 2, …, n_categories]. + # Symbols should be in the set [1, 2, …, c]. symbols = Entropies.symbolize(x, s) - @test all([s ∈ collect(1:n_categories) for s in symbols]) + @test all([s ∈ collect(1:c) for s in symbols]) + + # Test case from Rostaghi & Azami (2016)'s dispersion entropy paper. + y = [9.0, 8.0, 1.0, 12.0, 5.0, -3.0, 1.5, 8.01, 2.99, 4.0, -1.0, 10.0] + scheme = GaussianSymbolization(3) + s = symbolize(y, scheme) + @test s == [3, 3, 1, 3, 2, 1, 1, 3, 2, 2, 1, 3] end end @@ -325,32 +333,37 @@ end end end - @testset "Dispersion entropy" begin - # Li et al. (2018) recommends using at least 1000 data points when estimating - # dispersion entropy. - x = rand(1000) - n_categories = 4 - m = 4 - τ = 1 - s = GaussianSymbolization(n_categories = n_categories) - - # Symbols should be in the set [1, 2, …, n_categories]. - symbols = Entropies.symbolize(x, s) - @test all([s ∈ collect(1:n_categories) for s in symbols]) - - # Dispersion patterns should have a normalized histogram that sums to 1.0. - dispersion_patterns = DelayEmbeddings.embed(symbols, m, τ) - hist = Entropies.dispersion_histogram(dispersion_patterns, length(x), m, τ) - @test sum(hist) ≈ 1.0 - - de = entropy_dispersion(x, s, m = 4, τ = 1) - @test typeof(de) <: Real - @test de >= 0.0 - end - @testset "Tsallis" begin p = Probabilities(repeat([1/5], 5)) @assert round(entropy_tsallis(p, q = -1/2, k = 1), digits = 2) ≈ 6.79 + + # Analytical tests from Tsallis (1998) + # ----------------------------------- + # Probability distribution has only one element + @test entropy_tsallis(Probabilities([1.0])) ≈ 0.0 + + # One and only one probability of 1.0 => entropy → 0. + @test entropy_tsallis(Probabilities([1.0, 0.0, 0.0, 0.0])) ≈ 0.0 + + # Uniform distribution maximizes Tsallis entropy, which then equals log(N), + # where N is the number of states. Then the entropy attains its maximum value + # (N^(1 - q) - 1) / (1 - q) + N = 4 + ps = Probabilities(repeat([1/N], N)) + + q_cases = [-2.0, -0.5, 0.5, 2.0] + t_entropies = [entropy_tsallis(ps, q = q) for q in q_cases] + maxvals = [maxentropy_tsallis(N, q) for q in q_cases] + @test all(t_entropies .≈ maxvals) + + q_cases = [-2.0, -0.5, 0.5, 2.0] + k = 2 + t_entropies = [entropy_tsallis(ps, q = q, k = 2) for q in q_cases] + maxvals = [maxentropy_tsallis(N, q, k = 2) for q in q_cases] + @test all(t_entropies .≈ maxvals) + + # Reduces to Shannon entropy for q → 1.0 + @test entropy_tsallis(ps, q = 1.0, base = 2) ≈ log(2, N) end end