refactor whitening for closer integration with StatsBase types (part of

JuliaStats#109)
wildart · Mar 11, 2021 · 7cde9c3 · 7cde9c3
1 parent 617b5bb
commit 7cde9c3
Show file tree

Hide file tree

Showing 9 changed files with 183 additions and 21 deletions.
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -0,0 +1,7 @@
+[deps]
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+
+[compat]
+Documenter = "0.26"
diff --git a/docs/make.jl b/docs/make.jl
@@ -0,0 +1,16 @@
+using Documenter, MultivariateStats, StatsBase, Statistics, Random, LinearAlgebra
+
+if Base.HOME_PROJECT[] !== nothing
+    Base.HOME_PROJECT[] = abspath(Base.HOME_PROJECT[])
+end
+
+makedocs(
+    sitename = "MultivariateStats.jl",
+    modules = [MultivariateStats],
+    pages = ["index.md",
+             "whiten.md"]
+)
+
+deploydocs(
+    repo = "github.com/JuliaStats/MultivariateStats.jl.git"
+)
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -0,0 +1,19 @@
+# MultivariateStats.jl Documentation
+
+```@meta
+CurrentModule = MultivariateStats
+DocTestSetup = quote
+    using Statistics
+    using Random
+end
+```
+
+*MultivariateStats.jl* is a Julia package for multivariate statistical analysis. It provides a rich set of useful analysis techniques, such as PCA, CCA, LDA, ICA, etc.
+
+
+```@contents
+Pages = ["whiten.md"]
+Depth = 2
+```
+
+**Notes:** All methods implemented in this package adopt the column-major convention of JuliaStats: in a data matrix, each column corresponds to a sample/observation, while each row corresponds to a feature (variable or attribute).
diff --git a/docs/src/whiten.md b/docs/src/whiten.md
@@ -0,0 +1,35 @@
+# Data Whitening
+
+A [whitening transformation](http://en.wikipedia.org/wiki/Whitening_transformation>) is a decorrelation transformation that transforms a set of random variables into a set of new random variables with identity covariance (uncorrelated with unit variances).
+
+In particular, suppose a random vector has covariance ``\mathbf{C}``, then a whitening transform ``\mathbf{W}`` is one that satisfy:
+
+```math
+   \mathbf{W}^T \mathbf{C} \mathbf{W} = \mathbf{I}
+```
+
+Note that ``\mathbf{W}`` is generally not unique. In particular, if ``\mathbf{W}`` is a whitening transform, so is any of its rotation ``\mathbf{W} \mathbf{R}`` with ``\mathbf{R}^T \mathbf{R} = \mathbf{I}``.
+
+## Whitening
+
+The package uses [`Whitening`](@ref) to represent a whitening transform.
+
+```@docs
+Whitening
+```
+
+Whitening transformation can be fitted to data using the `fit` method.
+
+```@docs
+fit(::Type{Whitening}, X::AbstractMatrix{T}; kwargs...) where {T<:Real}
+transform(::Whitening, ::AbstractVecOrMat)
+indim
+outdim
+mean(::Whitening)
+```
+
+Additional methods
+```@docs
+cov_whitening
+cov_whitening!
+```
diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl
@@ -1,16 +1,17 @@
 module MultivariateStats
     using LinearAlgebra
-    using StatsBase: SimpleCovariance, CovarianceEstimator
+    using StatsBase: SimpleCovariance, CovarianceEstimator, RegressionModel,
+                     AbstractDataTransform
     import Statistics: mean, var, cov, covm
     import Base: length, size, show, dump
-    import StatsBase: fit, predict, ConvergenceException
+    import StatsBase: fit, predict, predict!, ConvergenceException, dof_residual, coef
     import SparseArrays
     import LinearAlgebra: eigvals
 
     export
 
     ## common
-    evaluate,           # evaluate discriminant function values (imported from Base)
+    evaluate,           # evaluate discriminant function values
     predict,            # use a model to predict responses (imported from StatsBase)
     fit,                # fit a model to data (imported from StatsBase)
     centralize,         # subtract a mean vector from each column
@@ -19,7 +20,7 @@ module MultivariateStats
     outdim,             # the output dimension of a model
     projection,         # the projection matrix
     reconstruct,        # reconstruct the input (approximately) given the output
-    transform,          # apply a model to transform a vector or a matrix
+    # transform,          # apply a model to transform a vector or a matrix
 
     # lreg
     llsq,               # Linear Least Square regression
@@ -112,8 +113,8 @@ module MultivariateStats
     faem,                   # Maximum likelihood probabilistic PCA
     facm                    # EM algorithm for probabilistic PCA
 
-
     ## source files
+    include("types.jl")
     include("common.jl")
     include("lreg.jl")
     include("whiten.jl")
@@ -126,4 +127,8 @@ module MultivariateStats
     include("ica.jl")
     include("fa.jl")
 
+    @deprecate transform(m,x) predict(m,x) #ex=false
+    @deprecate transform(m) predict(m) #ex=false
+    # const transform = predict
+
 end # module
diff --git a/src/common.jl b/src/common.jl
@@ -20,7 +20,7 @@ decentralize(x::AbstractMatrix, m::AbstractVector) = (isempty(m) ? x : x .+ m)
 
 # get a full mean vector
 
-fullmean(d::Int, mv::Vector{T}) where T = (isempty(mv) ? zeros(T, d) : mv)
+fullmean(d::Int, mv::AbstractVector{T}) where T = (isempty(mv) ? zeros(T, d) : mv)
 
 preprocess_mean(X::AbstractMatrix{T}, m) where T<:Real =
     (m === nothing ? vec(mean(X, dims=2)) : m == 0 ? T[] :  m)

diff --git a/src/types.jl b/src/types.jl
@@ -0,0 +1,14 @@
+
+"""
+    indim(m)
+
+Get the out dimension of the model `m`.
+"""
+function indim(m::RegressionModel) end
+
+"""
+    outdim(m)
+
+Get the out dimension of the model `m`.
+"""
+function outdim(m::RegressionModel) end
diff --git a/src/whiten.jl b/src/whiten.jl
@@ -4,42 +4,99 @@
 #
 # finds W, such that W'CW = I
 #
+"""
+    cov_whitening(C)
+
+Derive the whitening transform coefficient matrix `W` given the covariance matrix `C`. Here, `C` can be either a square matrix, or an instance of `Cholesky`.
+
+Internally, this function solves the whitening transform using Cholesky factorization. The rationale is as follows: let ``\\mathbf{C} = \\mathbf{U}^T \\mathbf{U}`` and ``\\mathbf{W} = \\mathbf{U}^{-1}``, then ``\\mathbf{W}^T \\mathbf{C} \\mathbf{W} = \\mathbf{I}``.
+
+**Note:** The return matrix `W` is an upper triangular matrix.
+"""
 function cov_whitening(C::Cholesky{T}) where {T<:Real}
     cf = C.UL
     Matrix{T}(inv(istriu(cf) ? cf : cf'))
 end
 
-cov_whitening!(C::DenseMatrix{<:Real}) = cov_whitening(cholesky!(Hermitian(C, :U)))
-cov_whitening(C::DenseMatrix{<:Real}) = cov_whitening!(copy(C))
+"""
+    cov_whitening!(C)
+
+In-place version of `cov_whitening(C)`, in which the input matrix `C` will be overwritten during computation. This can be more efficient when `C` is no longer used.
+"""
+cov_whitening!(C::AbstractMatrix{<:Real}) = cov_whitening(cholesky!(Hermitian(C, :U)))
+cov_whitening(C::AbstractMatrix{<:Real}) = cov_whitening!(copy(C))
+
+"""
+    cov_whitening!(C, regcoef)
 
-cov_whitening!(C::DenseMatrix{<:Real}, regcoef::Real) = cov_whitening!(regularize_symmat!(C, regcoef))
-cov_whitening(C::DenseMatrix{<:Real}, regcoef::Real) = cov_whitening!(copy(C), regcoef)
+In-place version of `cov_whitening(C, regcoef)`, in which the input matrix `C` will be overwritten during computation. This can be more efficient when `C` is no longer used.
+"""
+cov_whitening!(C::AbstractMatrix{<:Real}, regcoef::Real) = cov_whitening!(regularize_symmat!(C, regcoef))
+
+"""
+    cov_whitening(C, regcoef)
+
+Derive a whitening transform based on a regularized covariance, as `C + (eigmax(C) * regcoef) * eye(d)`.
+"""
+cov_whitening(C::AbstractMatrix{<:Real}, regcoef::Real) = cov_whitening!(copy(C), regcoef)
 
 ## Whitening type
 
-struct Whitening{T<:Real}
-    mean::Vector{T}
-    W::Matrix{T}
+"""
+A whitening transform representation.
+"""
+struct Whitening{T<:Real} <: AbstractDataTransform
+    mean::AbstractVector{T}
+    W::AbstractMatrix{T}
 
-    function Whitening{T}(mean::Vector{T}, W::Matrix{T}) where {T<:Real}
+    function Whitening{T}(mean::AbstractVector{T}, W::AbstractMatrix{T}) where {T<:Real}
         d, d2 = size(W)
         d == d2 || error("W must be a square matrix.")
         isempty(mean) || length(mean) == d ||
         throw(DimensionMismatch("Sizes of mean and W are inconsistent."))
         return new(mean, W)
     end
 end
-Whitening(mean::Vector{T}, W::Matrix{T}) where {T<:Real} = Whitening{T}(mean, W)
+Whitening(mean::AbstractVector{T}, W::AbstractMatrix{T}) where {T<:Real} = Whitening{T}(mean, W)
 
 indim(f::Whitening) = size(f.W, 1)
 outdim(f::Whitening) = size(f.W, 2)
+
+"""
+    mean(m)
+
+Get the mean vector of whitening transformation `m`.
+
+**Note:** if mean is empty, this function returns a zero vector of length [`outdim`](@ref) .
+"""
 mean(f::Whitening) = fullmean(indim(f), f.mean)
 
+
+"""
+    transform(f, x)
+
+Apply the whitening transform `f` to a vector or a matrix `x` with samples in columns, as ``\\mathbf{W}^T (\\mathbf{x} - \\boldsymbol{\\mu})``.
+"""
 transform(f::Whitening, x::AbstractVecOrMat) = transpose(f.W) * centralize(x, f.mean)
 
-## Fit whitening to data
+"""
+    fit(::Type{Whitening},  X::AbstractMatrix{T}; kwargs...)
+
+Estimate a whitening transform from the data given in `X`. Here, `X` should be a matrix, whose columns give the samples.
 
-function fit(::Type{Whitening}, X::DenseMatrix{T};
+This function returns an instance of [`Whitening`](@ref)
+
+**Keyword Arguments:**
+- `regcoef`: The regularization coefficient. The covariance will be regularized as follows when `regcoef` is positive `C + (eigmax(C) * regcoef) * eye(d)`. Default values is `zero(T)`.
+
+- `mean`: The mean vector, which can be either of:
+    - `0`: the input data has already been centralized
+    - `nothing`: this function will compute the mean (**default**)
+    - a pre-computed mean vector
+
+**Note:** This function internally relies on [`cov_whitening`](@ref) to derive the transformation `W`.
+"""
+function fit(::Type{Whitening}, X::AbstractMatrix{T};
              mean=nothing, regcoef::Real=zero(T)) where {T<:Real}
     n = size(X, 2)
     n > 1 || error("X must contain more than one sample.")
@@ -51,7 +108,7 @@ end
 
 # invsqrtm
 
-function _invsqrtm!(C::Matrix{<:Real})
+function _invsqrtm!(C::AbstractMatrix{<:Real})
     n = size(C, 1)
     size(C, 2) == n || error("C must be a square matrix.")
     E = eigen!(Symmetric(C))
@@ -64,4 +121,9 @@ function _invsqrtm!(C::Matrix{<:Real})
     return U * transpose(U)
 end
 
-invsqrtm(C::DenseMatrix{<:Real}) = _invsqrtm!(copy(C))
+"""
+    invsqrtm(C)
+
+Compute `inv(sqrtm(C))` through symmetric eigenvalue decomposition.
+"""
+invsqrtm(C::AbstractMatrix{<:Real}) = _invsqrtm!(copy(C))
diff --git a/test/whiten.jl b/test/whiten.jl
@@ -1,5 +1,5 @@
 using MultivariateStats
-using LinearAlgebra
+using LinearAlgebra, StatsBase, SparseArrays
 using Test
 import Statistics: mean, cov
 import Random
@@ -57,7 +57,7 @@ import Random
     @test mean(f) === f.mean
     @test istriu(W)
     @test W'C * W ≈ Matrix(I, d, d)
-    @test transform(f, X) ≈ W' * (X .- f.mean)
+    @test  MultivariateStats.transform(f, X) ≈ W' * (X .- f.mean)
 
     f = fit(Whitening, X; regcoef=rc)
     W = f.W
@@ -78,4 +78,8 @@ import Random
     @test C == C0
     @test R ≈ inv(sqrt(C))
 
+    # sparse arrays
+    X = sprand(Float32, d, n, 0.5)
+    f = fit(Whitening, X; mean=sprand(Float32, 3, 0.5))
+    @test MultivariateStats.transform(f, X) isa DenseMatrix
 end