From c8564b847d6cc49bfa675b439d90d24810841d4d Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sun, 11 Aug 2024 15:46:48 +0200 Subject: [PATCH] Rework presentation of keywords (#393) * Unify all doc strings to one format. --- Changelog.md | 12 + Project.toml | 2 +- docs/src/about.md | 18 +- docs/src/notation.md | 2 + docs/src/plans/debug.md | 4 +- docs/src/plans/index.md | 2 +- docs/src/plans/record.md | 4 +- docs/src/plans/stepsize.md | 2 +- docs/src/solvers/DouglasRachford.md | 20 +- .../adaptive-regularization-with-cubics.md | 2 +- docs/src/solvers/conjugate_residual.md | 10 +- docs/src/solvers/interior_point_Newton.md | 2 +- .../config/vocabularies/Manopt/accept.txt | 10 + ext/ManoptLRUCacheExt.jl | 14 +- ext/ManoptLineSearchesExt.jl | 2 +- ext/ManoptManifoldsExt/ManoptManifoldsExt.jl | 6 + .../alternating_gradient.jl | 10 +- ext/ManoptManifoldsExt/manifold_functions.jl | 21 +- src/helpers/LineSearchesTypes.jl | 22 +- src/helpers/checks.jl | 162 +++++--- src/helpers/exports/Asymptote.jl | 71 ++-- src/plans/Douglas_Rachford_plan.jl | 16 +- src/plans/alternating_gradient_plan.jl | 7 +- src/plans/augmented_lagrangian_plan.jl | 26 +- src/plans/cache.jl | 58 +-- src/plans/conjugate_gradient_plan.jl | 31 +- src/plans/conjugate_residual_plan.jl | 114 ++++-- src/plans/constrained_plan.jl | 2 +- src/plans/count.jl | 18 +- src/plans/debug.jl | 185 ++++----- src/plans/difference_of_convex_plan.jl | 2 +- src/plans/docstring_snippets.jl | 167 +++++++++ src/plans/embedded_objective.jl | 20 +- src/plans/gradient_plan.jl | 54 +-- src/plans/hessian_plan.jl | 60 +-- src/plans/higher_order_primal_dual_plan.jl | 60 +-- src/plans/interior_point_Newton_plan.jl | 18 +- src/plans/nonlinear_least_squares_plan.jl | 52 +-- src/plans/plan.jl | 1 + src/plans/primal_dual_plan.jl | 57 +-- src/plans/proximal_plan.jl | 50 ++- src/plans/quasi_newton_plan.jl | 62 +-- src/plans/record.jl | 86 ++--- src/plans/solver_state.jl | 10 +- src/plans/stepsize.jl | 354 ++++++++++-------- src/plans/stochastic_gradient_plan.jl | 6 +- src/plans/stopping_criterion.jl | 122 +++--- src/solvers/ChambollePock.jl | 199 +++++----- src/solvers/DouglasRachford.jl | 161 ++++---- src/solvers/FrankWolfe.jl | 184 +++++---- src/solvers/Lanczos.jl | 122 +++--- src/solvers/LevenbergMarquardt.jl | 68 ++-- src/solvers/NelderMead.jl | 138 ++++--- .../adaptive_regularization_with_cubics.jl | 219 ++++++----- src/solvers/alternating_gradient_descent.jl | 4 +- src/solvers/augmented_Lagrangian_method.jl | 257 ++++++++----- src/solvers/cma_es.jl | 67 ++-- src/solvers/conjugate_gradient_descent.jl | 104 +++-- src/solvers/conjugate_residual.jl | 30 +- src/solvers/convex_bundle_method.jl | 222 ++++++----- src/solvers/cyclic_proximal_point.jl | 60 ++- src/solvers/debug_solver.jl | 26 +- .../difference-of-convex-proximal-point.jl | 179 ++++----- src/solvers/difference_of_convex_algorithm.jl | 123 +++--- src/solvers/exact_penalty_method.jl | 216 ++++++----- src/solvers/gradient_descent.jl | 145 +++---- src/solvers/interior_point_Newton.jl | 53 ++- src/solvers/particle_swarm.jl | 171 +++++---- src/solvers/primal_dual_semismooth_Newton.jl | 60 +-- src/solvers/proximal_bundle_method.jl | 190 +++++----- src/solvers/quasi_Newton.jl | 203 +++++----- src/solvers/record_solver.jl | 16 +- src/solvers/solver.jl | 30 +- src/solvers/stochastic_gradient_descent.jl | 98 ++--- src/solvers/subgradient.jl | 90 ++--- .../truncated_conjugate_gradient_descent.jl | 218 ++++++----- src/solvers/trust_regions.jl | 178 +++++---- test/plans/test_conjugate_gradient_plan.jl | 2 +- test/plans/test_conjugate_residual_plan.jl | 4 +- test/plans/test_stopping_criteria.jl | 4 +- test/solvers/test_ChambollePock.jl | 2 +- ...est_adaptive_regularization_with_cubics.jl | 3 +- tutorials/HowToDebug.qmd | 4 +- tutorials/ImplementASolver.qmd | 2 +- 84 files changed, 3243 insertions(+), 2645 deletions(-) create mode 100644 src/plans/docstring_snippets.jl diff --git a/Changelog.md b/Changelog.md index 9ecb1b6cc0..e6c1eb8e1a 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,6 +5,17 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.70] WIP + +### Added + +* Unify doc strings and presentation of keyword arguments + * general indexing, for example in a vector, uses `i` + * index for inequality constraints is unified to `i` running from `1,...,m` + * index for equality constraints is unified to `j` running from `1,...,n` + * iterations are using now `k` +* Doc strings unified and even reusing similar docstring snippets. + ## [0.4.69] – August 3, 2024 ### Changed @@ -40,6 +51,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * a few typos in the documentation * `WolfePowellLinesearch` no longer uses `max_stepsize` with invalid point by default. + ## [0.4.66] June 27, 2024 ### Changed diff --git a/Project.toml b/Project.toml index c429950d33..39367819b4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Manopt" uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" authors = ["Ronny Bergmann "] -version = "0.4.69" +version = "0.4.70" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" diff --git a/docs/src/about.md b/docs/src/about.md index a1a3c1eccb..554307fc8f 100644 --- a/docs/src/about.md +++ b/docs/src/about.md @@ -3,7 +3,10 @@ Manopt.jl inherited its name from [Manopt](https://manopt.org), a Matlab toolbox for optimization on manifolds. This Julia package was started and is currently maintained by [Ronny Bergmann](https://ronnybergmann.net/). -The following people contributed +## Contributors + +Thanks to the following contributors to `Manopt.jl`: + * [Constantin Ahlmann-Eltze](https://const-ae.name) implemented the [gradient and differential `check` functions](helpers/checks.md) * [Renée Dornig](https://github.com/r-dornig) implemented the [particle swarm](solvers/particle_swarm.md), the [Riemannian Augmented Lagrangian Method](solvers/augmented_Lagrangian_method.md), the [Exact Penalty Method](solvers/exact_penalty_method.md), as well as the [`NonmonotoneLinesearch`](@ref) * [Willem Diepeveen](https://www.maths.cam.ac.uk/person/wd292) implemented the [primal-dual Riemannian semismooth Newton](solvers/primal_dual_semismooth_Newton.md) solver. @@ -14,21 +17,28 @@ The following people contributed * [Markus A. Stokkenes](https://www.linkedin.com/in/markus-a-stokkenes-b41bba17b/) contributed most of the implementation of the [Interior Point Newton Method](solvers/interior_point_Newton.md) * [Manuel Weiss](https://scoop.iwr.uni-heidelberg.de/author/manuel-weiß/) implemented most of the [conjugate gradient update rules](@ref cg-coeffs) -as well as various [contributors](https://github.com/JuliaManifolds/Manopt.jl/graphs/contributors) providing small extensions, finding small bugs and mistakes and fixing them by opening [PR](https://github.com/JuliaManifolds/Manopt.jl/pulls)s. +as well as various [contributors](https://github.com/JuliaManifolds/Manopt.jl/graphs/contributors) providing small extensions, finding small bugs and mistakes and fixing them by opening [PR](https://github.com/JuliaManifolds/Manopt.jl/pulls)s. Thanks to all of you. If you want to contribute a manifold or algorithm or have any questions, visit the [GitHub repository](https://github.com/JuliaManifolds/Manopt.jl/) to clone/fork the repository or open an issue. +## Work using Manopt.jl + +* [ExponentialFamilyProjection.jl](https://github.com/ReactiveBayes/ExponentialFamilyProjection.jl) projects distributions +* [Caesar.jl](https://github.com/JuliaRobotics/Caesar.jl) within non-Gaussian factor graph inference algorithms + +Is a package missing? [Open an issue](https://github.com/JuliaManifolds/Manopt.jl/issues/new)! +It would be great to collect anything and anyone using Manopt.jl -# Further packages +## Further packages `Manopt.jl` belongs to the Manopt family: * [manopt.org](https://www.manopt.org) The Matlab version of Manopt, see also their :octocat: [GitHub repository](https://github.com/NicolasBoumal/manopt) * [pymanopt.org](https://www.pymanopt.org/) The Python version of Manopt providing also several AD backends, see also their :octocat: [GitHub repository](https://github.com/pymanopt/pymanopt) -but there are also more packages providing tools on manifolds: +but there are also more packages providing tools on manifolds in other languages * [Jax Geometry](https://github.com/ComputationalEvolutionaryMorphometry/jaxgeometry) (Python/Jax) for differential geometry and stochastic dynamics with deep learning * [Geomstats](https://geomstats.github.io) (Python with several backends) focusing on statistics and machine learning :octocat: [GitHub repository](https://github.com/geomstats/geomstats) diff --git a/docs/src/notation.md b/docs/src/notation.md index f95e5d2d81..0981000eb0 100644 --- a/docs/src/notation.md +++ b/docs/src/notation.md @@ -5,4 +5,6 @@ with the following additional parts. | Symbol | Description | Also used | Comment | |:--:|:--------------- |:--:|:-- | +| ``\operatorname{arg\,min}`` | argument of a function ``f`` where a local or global minimum is attained | | +| ``k`` | the current iterate | ``ì`` | the goal is to unify this to `k` | | ``∇`` | The [Levi-Cevita connection](https://en.wikipedia.org/wiki/Levi-Civita_connection) | | | diff --git a/docs/src/plans/debug.md b/docs/src/plans/debug.md index 1689a18166..5b2ce956bf 100644 --- a/docs/src/plans/debug.md +++ b/docs/src/plans/debug.md @@ -24,6 +24,6 @@ automatically available, as explained in the [`gradient_descent`](@ref) solver. ```@docs initialize_solver!(amp::AbstractManoptProblem, dss::DebugSolverState) -step_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, i) -stop_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, i::Int) +step_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, k) +stop_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, k::Int) ``` diff --git a/docs/src/plans/index.md b/docs/src/plans/index.md index 0fb9fa90e0..5097c770ef 100644 --- a/docs/src/plans/index.md +++ b/docs/src/plans/index.md @@ -26,7 +26,7 @@ The following symbols are used. | Symbol | Used in | Description | | :----------- | :------ | :--------------------------------------------------------- | | `:Activity` | [`DebugWhenActive`](@ref) | activity of the debug action stored within | -| `:Basepoint` | [`TangentSpace`]() | the point the tangent space is at | +| `:Basepoint` | [`TangentSpace`](@extref ManifoldsBase `ManifoldsBase.TangentSpace`) | the point the tangent space is at | | `:Cost` | generic |the cost function (within an objective, as pass down) | | `:Debug` | [`DebugSolverState`](@ref) | the stored `debugDictionary` | | `:Gradient` | generic | the gradient function (within an objective, as pass down) | diff --git a/docs/src/plans/record.md b/docs/src/plans/record.md index 8f716ca4d2..080b0d7b38 100644 --- a/docs/src/plans/record.md +++ b/docs/src/plans/record.md @@ -41,6 +41,6 @@ Further specific [`RecordAction`](@ref)s can be found when specific types of [`A ```@docs initialize_solver!(amp::AbstractManoptProblem, rss::RecordSolverState) -step_solver!(p::AbstractManoptProblem, s::RecordSolverState, i) -stop_solver!(p::AbstractManoptProblem, s::RecordSolverState, i) +step_solver!(p::AbstractManoptProblem, s::RecordSolverState, k) +stop_solver!(p::AbstractManoptProblem, s::RecordSolverState, k) ``` \ No newline at end of file diff --git a/docs/src/plans/stepsize.md b/docs/src/plans/stepsize.md index 27ee9ebc35..b2862f2f88 100644 --- a/docs/src/plans/stepsize.md +++ b/docs/src/plans/stepsize.md @@ -33,7 +33,7 @@ Tangent bundle with the Sasaki metric has 0 injectivity radius, so the maximum s `Hyperrectangle` also has 0 injectivity radius and an estimate based on maximum of dimensions along each index is used instead. For manifolds with corners, however, a line search capable of handling break points along the projected search direction should be used, and such algorithms do not call `max_stepsize`. -Some solvers have a different iterate from the one used for linesearch. Then the following state can be used to wrap +Some solvers have a different iterate from the one used for the line search. Then the following state can be used to wrap these locally ```@docs diff --git a/docs/src/solvers/DouglasRachford.md b/docs/src/solvers/DouglasRachford.md index 858fed5448..f7a04ccff0 100644 --- a/docs/src/solvers/DouglasRachford.md +++ b/docs/src/solvers/DouglasRachford.md @@ -6,44 +6,44 @@ manifolds in [BergmannPerschSteidl:2016](@cite). The aim is to minimize the sum ```math -F(p) = f(p) + g(p) +f(p) = g(p) + h(p) ``` on a manifold, where the two summands have proximal maps -``\operatorname{prox}_{λ f}, \operatorname{prox}_{λ g}`` that are easy +``\operatorname{prox}_{λ g}, \operatorname{prox}_{λ h}`` that are easy to evaluate (maybe in closed form, or not too costly to approximate). Further, define the reflection operator at the proximal map as ```math -\operatorname{refl}_{λ f}(p) = \operatorname{retr}_{\operatorname{prox}_{λ f}(p)} \bigl( -\operatorname{retr}^{-1}_{\operatorname{prox}_{λ f}(p)} p \bigr). +\operatorname{refl}_{λ g}(p) = \operatorname{retr}_{\operatorname{prox}_{λ g}(p)} \bigl( -\operatorname{retr}^{-1}_{\operatorname{prox}_{λ g}(p)} p \bigr). ``` Let ``\alpha_k ∈ [0,1]`` with ``\sum_{k ∈ ℕ} \alpha_k(1-\alpha_k) = \infty`` and ``λ > 0`` (which might depend on iteration ``k`` as well) be given. -Then the (P)DRA algorithm for initial data ``x_0 ∈ \mathcal H`` as +Then the (P)DRA algorithm for initial data ``p^{(0)} ∈ \mathcal M`` as ## Initialization -Initialize ``t_0 = x_0`` and ``k=0`` +Initialize ``q^{(0)} = p^{(0)}`` and ``k=0`` ## Iteration Repeat until a convergence criterion is reached -1. Compute ``s_k = \operatorname{refl}_{λ f}\operatorname{refl}_{λ g}(t_k)`` -2. Within that operation, store ``p_{k+1} = \operatorname{prox}_{λ g}(t_k)`` which is the prox the inner reflection reflects at. -3. Compute ``t_{k+1} = g(\alpha_k; t_k, s_k)``, where ``g`` is a curve approximating the shortest geodesic, provided by a retraction and its inverse +1. Compute ``r^{(k)} = \operatorname{refl}_{λ g}\operatorname{refl}_{λ h}(q^{(k)})`` +2. Within that operation, store ``p^{(k+1)} = \operatorname{prox}_{λ h}(q^{(k)})`` which is the prox the inner reflection reflects at. +3. Compute ``q^{(k+1)} = g(\alpha_k; q^{(k)}, r^{(k)})``, where ``g`` is a curve approximating the shortest geodesic, provided by a retraction and its inverse 4. Set ``k = k+1`` ## Result -The result is given by the last computed ``p_K``. +The result is given by the last computed ``p^{(K)}`` at the last iterate ``K``. For the parallel version, the first proximal map is a vectorial version where in each component one prox is applied to the corresponding copy of ``t_k`` and the second proximal map corresponds to the indicator function of the set, -where all copies are equal (in ``\mathcal H^n``, where ``n`` is the number of copies), +where all copies are equal (in ``\mathcal M^n``, where ``n`` is the number of copies), leading to the second prox being the Riemannian mean. ## Interface diff --git a/docs/src/solvers/adaptive-regularization-with-cubics.md b/docs/src/solvers/adaptive-regularization-with-cubics.md index e866b7da26..4b6ff3ebc1 100644 --- a/docs/src/solvers/adaptive-regularization-with-cubics.md +++ b/docs/src/solvers/adaptive-regularization-with-cubics.md @@ -64,7 +64,7 @@ of a manifolds to be available * By default the tangent vector storing the gradient is initialized calling [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. * [`inner`](@extref `ManifoldsBase.inner-Tuple{AbstractManifold, Any, Any, Any}`)`(M, p, X, Y)` is used within the algorithm step -Furthermore, within the Lanczos subsolver, generating a random vector (at `p`) using [`rand!`](@extref Base.rand-Tuple{AbstractManifold})(M, X; vector_at=p)` in place of `X` is required +Furthermore, within the Lanczos subsolver, generating a random vector (at `p`) using [`rand!`](@extref Base.rand-Tuple{AbstractManifold})`(M, X; vector_at=p)` in place of `X` is required ## Literature diff --git a/docs/src/solvers/conjugate_residual.md b/docs/src/solvers/conjugate_residual.md index d20efad989..0ed7f404b7 100644 --- a/docs/src/solvers/conjugate_residual.md +++ b/docs/src/solvers/conjugate_residual.md @@ -1,4 +1,4 @@ -# Conjugate Residual Solver in a Tangent space +# Conjugate residual solver in a Tangent space ```@meta CurrentModule = Manopt @@ -14,7 +14,7 @@ conjugate_residual ConjugateResidualState ``` -## Objetive +## Objective ```@docs SymmetricLinearSystemObjective @@ -26,6 +26,12 @@ SymmetricLinearSystemObjective StopWhenRelativeResidualLess ``` +## Internal functions + +```@docs +Manopt.get_b +``` + ## Literature ```@bibliography diff --git a/docs/src/solvers/interior_point_Newton.md b/docs/src/solvers/interior_point_Newton.md index 02bc3520ce..773d2e2cd2 100644 --- a/docs/src/solvers/interior_point_Newton.md +++ b/docs/src/solvers/interior_point_Newton.md @@ -1,4 +1,4 @@ -# Interior Point Newton method +# Interior point Newton method ```@meta CurrentModule = Manopt diff --git a/docs/styles/config/vocabularies/Manopt/accept.txt b/docs/styles/config/vocabularies/Manopt/accept.txt index 66fdfd3115..75246fb123 100644 --- a/docs/styles/config/vocabularies/Manopt/accept.txt +++ b/docs/styles/config/vocabularies/Manopt/accept.txt @@ -1,3 +1,10 @@ +_field_.*\b +_arg_.*\b +_kw_.*\b +_l_.*\b +_math_.*\b +_problem_.*\b +_doc_.*\b Absil Adagrad [A|a]djoint @@ -62,6 +69,7 @@ Lui Manifolds.jl ManifoldsBase.jl [Mm]anopt(:?.org|.jl)? +Markus Marquardt Moakher Munkvold @@ -90,6 +98,7 @@ Riemer Riemopt Riesz Rosenbrock +Sasaki semicontinuous Steihaug Stiefel @@ -98,6 +107,7 @@ Souza Steidl Stephansen [Ss]tepsize +Stokkenes [Ss]ubdifferential [Ss]ubgradient subsampled diff --git a/ext/ManoptLRUCacheExt.jl b/ext/ManoptLRUCacheExt.jl index 6273a7aa44..6e2274f3cb 100644 --- a/ext/ManoptLRUCacheExt.jl +++ b/ext/ManoptLRUCacheExt.jl @@ -27,11 +27,15 @@ Given a vector of symbols `caches`, this function sets up the # Keyword arguments -* `p`: (`rand(M)`) a point on a manifold, to both infer its type for keys and initialize caches -* `value`: (`0.0`) a value both typing and initialising number-caches, the default is for (Float) values like the cost. -* `X`: (`zero_vector(M, p)` a tangent vector at `p` to both type and initialize tangent vector caches -* `cache_size`: (`10`) a default cache size to use -* `cache_sizes`: (`Dict{Symbol,Int}()`) a dictionary of sizes for the `caches` to specify different (non-default) sizes +* `p=`$(Manopt._link_rand()): a point on a manifold, to both infer its type for keys and initialize caches +* `value=0.0`: + a value both typing and initialising number-caches, the default is for (Float) values like the cost. +* `X=zero_vector(M, p)`: + a tangent vector at `p` to both type and initialize tangent vector caches +* `cache_size=10`: + a default cache size to use +* `cache_sizes=Dict{Symbol,Int}()`: + a dictionary of sizes for the `caches` to specify different (non-default) sizes """ function Manopt.init_caches( M::AbstractManifold, diff --git a/ext/ManoptLineSearchesExt.jl b/ext/ManoptLineSearchesExt.jl index 37a5c2dfaa..61efbb8f22 100644 --- a/ext/ManoptLineSearchesExt.jl +++ b/ext/ManoptLineSearchesExt.jl @@ -15,7 +15,7 @@ end function (cs::Manopt.LineSearchesStepsize)( mp::AbstractManoptProblem, s::AbstractManoptSolverState, - i::Int, + k::Int, η=-get_gradient(s); fp=get_cost(mp, get_iterate(s)), kwargs..., diff --git a/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl b/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl index 679b7d6a7b..fc62c4ea4e 100644 --- a/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl +++ b/ext/ManoptManifoldsExt/ManoptManifoldsExt.jl @@ -2,6 +2,12 @@ module ManoptManifoldsExt using ManifoldsBase: exp, log, ParallelTransport, vector_transport_to using Manopt +using Manopt: + _l_refl, + _l_retr, + _kw_retraction_method_default, + _kw_inverse_retraction_method_default, + _kw_X_default import Manopt: max_stepsize, alternating_gradient_descent, diff --git a/ext/ManoptManifoldsExt/alternating_gradient.jl b/ext/ManoptManifoldsExt/alternating_gradient.jl index b6780f74f8..252f916419 100644 --- a/ext/ManoptManifoldsExt/alternating_gradient.jl +++ b/ext/ManoptManifoldsExt/alternating_gradient.jl @@ -16,18 +16,18 @@ function get_gradient( end @doc raw""" - X = get_gradient(M::AbstractManifold, p::ManifoldAlternatingGradientObjective, p, k) - get_gradient!(M::AbstractManifold, p::ManifoldAlternatingGradientObjective, X, p, k) + X = get_gradient(M::AbstractManifold, p::ManifoldAlternatingGradientObjective, p, i) + get_gradient!(M::AbstractManifold, p::ManifoldAlternatingGradientObjective, X, p, i) -Evaluate one of the component gradients ``\operatorname{grad}f_k``, ``k∈\{1,…,n\}``, at `x` (in place of `Y`). +Evaluate one of the component gradients ``\operatorname{grad}f_i``, ``i∈\{1,…,n\}``, at `x` (in place of `Y`). """ function get_gradient( M::ProductManifold, mago::ManifoldAlternatingGradientObjective{AllocatingEvaluation,TC,<:Function}, p, - k, + i, ) where {TC} - return get_gradient(M, mago, p)[M, k] + return get_gradient(M, mago, p)[M, i] end function get_gradient!( M::AbstractManifold, diff --git a/ext/ManoptManifoldsExt/manifold_functions.jl b/ext/ManoptManifoldsExt/manifold_functions.jl index 494dbb3381..f921aad698 100644 --- a/ext/ManoptManifoldsExt/manifold_functions.jl +++ b/ext/ManoptManifoldsExt/manifold_functions.jl @@ -108,28 +108,31 @@ function reflect!(M::AbstractManifold, q, pr::Function, x; kwargs...) return reflect!(M, q, pr(x), x; kwargs...) end -@doc raw""" +@doc """ reflect(M, p, x, kwargs...) reflect!(M, q, p, x, kwargs...) Reflect the point `x` from the manifold `M` at point `p`, given by -````math - \operatorname{refl}_p(x) = \operatorname{retr}_p(-\operatorname{retr}^{-1}_p x). -```` +```math +$_l_refl +``` -where ``\operatorname{retr}`` and ``\operatorname{retr}^{-1}`` denote a retraction and an inverse +where ``$_l_retr`` and ``$_l_retr^{-1}`` denote a retraction and an inverse retraction, respectively. This can also be done in place of `q`. ## Keyword arguments -* `retraction_method`: (`default_retraction_metiod(M, typeof(p))`) the retraction to use in the reflection -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) the inverse retraction to use within the reflection +* $_kw_retraction_method_default + the retraction to use in the reflection +* $_kw_inverse_retraction_method_default + the inverse retraction to use within the reflection and for the `reflect!` additionally -* `X`: (`zero_vector(M,p)`) a temporary memory to compute the inverse retraction in place. +* $_kw_X_default + a temporary memory to compute the inverse retraction in place. otherwise this is the memory that would be allocated anyways. """ function reflect( @@ -149,7 +152,7 @@ function reflect!( q, p, x; - retraction_method=default_retraction_method(M), + retraction_method=default_retraction_method(M, typeof(p)), inverse_retraction_method=default_inverse_retraction_method(M), X=zero_vector(M, p), ) diff --git a/src/helpers/LineSearchesTypes.jl b/src/helpers/LineSearchesTypes.jl index 1ed2ef3e1e..bf803f245c 100644 --- a/src/helpers/LineSearchesTypes.jl +++ b/src/helpers/LineSearchesTypes.jl @@ -6,24 +6,24 @@ Wrapper for line searches available in the `LineSearches.jl` library. ## Constructors + LineSearchesStepsize(M::AbstractManifold, linesearch; kwargs... LineSearchesStepsize( - M::AbstractManifold, linesearch; - retraction_method::AbstractRetractionMethod=default_retraction_method(M), - vector_transport_method::AbstractVectorTransportMethod=default_vector_transport_method(M), - ) - LineSearchesStepsize( - linesearch; - retraction_method::AbstractRetractionMethod=ExponentialRetraction(), - vector_transport_method::AbstractVectorTransportMethod=ParallelTransport(), + retraction_method=ExponentialRetraction(), + vector_transport_method=ParallelTransport(), ) Wrap `linesearch` (for example [`HagerZhang`](https://julianlsolvers.github.io/LineSearches.jl/latest/reference/linesearch.html#LineSearches.HagerZhang) or [`MoreThuente`](https://julianlsolvers.github.io/LineSearches.jl/latest/reference/linesearch.html#LineSearches.MoreThuente)). The initial step selection from Linesearches.jl is not yet supported and the value 1.0 is used. -The retraction used for determining the line along which the search is performed can be - provided as `retraction_method`. Gradient vectors are transported between points using -`vector_transport_method`. + +# Keyword Arguments + +* $_kw_retraction_method_default: + $_kw_retraction_method +* $_kw_vector_transport_method_default: + $_kw_vector_transport_method + """ struct LineSearchesStepsize{ TLS,TRM<:AbstractRetractionMethod,TVTM<:AbstractVectorTransportMethod diff --git a/src/helpers/checks.jl b/src/helpers/checks.jl index d4e6d337a5..b2309d5950 100644 --- a/src/helpers/checks.jl +++ b/src/helpers/checks.jl @@ -1,5 +1,5 @@ -@doc raw""" +@doc """ check_differential(M, F, dF, p=rand(M), X=rand(M; vector_at=p); kwargs...) Check numerically whether the differential `dF(M,p,X)` of `F(M,p)` is correct. @@ -11,19 +11,22 @@ no plot is generated, # Keyword arguments -* `exactness_tol`: (`1e-12`) if all errors are below this tolerance, the differential is considered to be exact -* `io`: (`nothing`) provide an `IO` to print the result to -* `limits`: (`(1e-8,1)`) specify the limits in the `log_range` -* `log_range`: (`range(limits[1], limits[2]; length=N)`) specify the range of points (in log scale) to sample the differential line -* `N`: (`101`) number of points to verify within the `log_range` default range ``[10^{-8},10^{0}]`` -* `name`: (`"differential"`) name to display in the plot -* `plot`: (`false`) whether to plot the result (if `Plots.jl` is loaded). +* `exactness_tol=1e-12`: if all errors are below this tolerance, + the differential is considered to be exact +* `io=nothing`: provide an `IO` to print the result to +* `limits=(1e-8,1)`: specify the limits in the `log_range` +* `log_range=range(limits[1], limits[2]; length=N)`: specify the range of points + (in log scale) to sample the differential line +* `N=101`: number of points to verify within the `log_range` default range ``[10^{-8},10^{0}]`` +* `name="differential"`: name to display in the plot +* `plot=false`: whether to plot the result (if `Plots.jl` is loaded). The plot is in log-log-scale. This is returned and can then also be saved. -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) retraction method to use -* `slope_tol`: (`0.1`) tolerance for the slope (global) of the approximation -* `throw_error`: (`false`) throw an error message if the differential is wrong -* `window`: (`nothing`) specify window sizes within the `log_range` that are used for the slope estimation. - the default is, to use all window sizes `2:N`. +* $_kw_retraction_method_default: + $_kw_retraction_method +* `slope_tol=0.1`: tolerance for the slope (global) of the approximation +* `throw_error=false`: throw an error message if the differential is wrong +* `window=nothing`: specify window sizes within the `log_range` that are used for + the slope estimation. The default is, to use all window sizes `2:N`. """ function check_differential( M::AbstractManifold, @@ -66,18 +69,20 @@ function check_differential( ) end -@doc raw""" - check_gradient(M, F, gradF, p=rand(M), X=rand(M; vector_at=p); kwargs...) - -Verify numerically whether the gradient `gradF(M,p)` of `F(M,p)` is correct, that is whether - - +_doc_check_gradient_formula = raw""" ```math f(\operatorname{retr}_p(tX)) = f(p) + t⟨\operatorname{grad} f(p), X⟩ + \mathcal O(t^2) ``` +""" +@doc """ + check_gradient(M, f, grad_f, p=rand(M), X=rand(M; vector_at=p); kwargs...) + +Verify numerically whether the gradient `grad_f(M,p)` of `f(M,p)` is correct, that is whether + +$_doc_check_gradient_formula or in other words, that the error between the function ``f`` and its first order Taylor -behaves in error ``\mathcal O(t^2)``, which indicates that the gradient is correct, +behaves in error ``$_l_cO O(t^2)``, which indicates that the gradient is correct, cf. also [Boumal:2023; Section 4.8](@cite). Note that if the errors are below the given tolerance and the method is exact, @@ -85,20 +90,34 @@ no plot is generated. # Keyword arguments -* `check_vector`: (`true`) verify that ``\operatorname{grad} f(p) ∈ T_p\mathcal M`` using `is_vector`. -* `exactness_tol`: (`1e-12`) if all errors are below this tolerance, the gradient is considered to be exact -* `io`: (`nothing`) provide an `IO` to print the result to -* `gradient`: (`grad_f(M, p)`) instead of the gradient function you can also provide the gradient at `p` directly -* `limits`: (`(1e-8,1)`) specify the limits in the `log_range` -* `log_range`: (`range(limits[1], limits[2]; length=N)`) - specify the range of points (in log scale) to sample the gradient line -* `N`: (`101`) number of points to verify within the `log_range` default range ``[10^{-8},10^{0}]`` -* `plot`: (`false`) whether to plot the result (if `Plots.jl` is loaded). +* `check_vector=true`: + verify that ``$_l_grad f(p) ∈ $(_l_TpM())`` using `is_vector`. +* `exactness_tol=1e-12`: + if all errors are below this tolerance, the gradient is considered to be exact +* `io=nothing`: + provide an `IO` to print the result to +* `gradient=grad_f(M, p)`: + instead of the gradient function you can also provide the gradient at `p` directly +* `limits=(1e-8,1)`: + specify the limits in the `log_range` +* `log_range=range(limits[1], limits[2]; length=N)`: + - specify the range of points (in log scale) to sample the gradient line +* `N=101`: + number of points to verify within the `log_range` default range ``[10^{-8},10^{0}]`` +* `plot=false`: + whether to plot the result (if `Plots.jl` is loaded). The plot is in log-log-scale. This is returned and can then also be saved. -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) retraction method to use -* `slope_tol`: (`0.1`) tolerance for the slope (global) of the approximation -* `atol`, `rtol`: (same defaults as `isapprox`) tolerances that are passed down to `is_vector` if `check_vector` is set to `true` -* `error`: (`:none`) how to handle errors, possible values: `:error`, `:info`, `:warn` -* `window`: (`nothing`) specify window sizes within the `log_range` that are used for the slope estimation. +* $_kw_retraction_method_default: + $_kw_retraction_method +* `slope_tol=0.1`: + tolerance for the slope (global) of the approximation +* `atol`=:none`: + aults as=nothing`: + hat are passed down to `is_vector` if `check_vector` is set to `true` +* `error=:none`: + how to handle errors, possible values: `:error`, `:info`, `:warn` +* `window=nothing`: + specify window sizes within the `log_range` that are used for the slope estimation. the default is, to use all window sizes `2:N`. The remaining keyword arguments are also passed down to the `check_vector` call, such that tolerances can @@ -125,20 +144,24 @@ function check_gradient( return check_differential(M, f, df, p, X; name="gradient", error=error, kwargs...) end -@doc raw""" +_doc_check_Hess_formula = raw""" +```math +f(\operatorname{retr}_p(tX)) = f(p) + t⟨\operatorname{grad} f(p), X⟩ + \frac{t^2}{2}⟨\operatorname{Hess}f(p)[X], X⟩ + \mathcal O(t^3) +``` +""" + +@doc """ check_Hessian(M, f, grad_f, Hess_f, p=rand(M), X=rand(M; vector_at=p), Y=rand(M, vector_at=p); kwargs...) -Verify numerically whether the Hessian ``\operatorname{Hess} f(M,p, X)`` of `f(M,p)` is correct. +Verify numerically whether the Hessian `Hess_f(M,p, X)` of `f(M,p)` is correct. For this either a second-order retraction or a critical point ``p`` of `f` is required. The approximation is then -```math -f(\operatorname{retr}_p(tX)) = f(p) + t⟨\operatorname{grad} f(p), X⟩ + \frac{t^2}{2}⟨\operatorname{Hess}f(p)[X], X⟩ + \mathcal O(t^3) -``` +$_doc_check_Hess_formula or in other words, that the error between the function ``f`` and its second order Taylor -behaves in error ``\mathcal O(t^3)``, which indicates that the Hessian is correct, +behaves in error ``$_l_cO (t^3)``, which indicates that the Hessian is correct, cf. also [Boumal:2023; Section 6.8](@cite). Note that if the errors are below the given tolerance and the method is exact, @@ -146,32 +169,49 @@ no plot is generated. # Keyword arguments -* `check_grad`: (`true`) verify that ``\operatorname{grad} f(p) ∈ T_p\mathcal M``. -* `check_linearity`: (`true`) verify that the Hessian is linear, see [`is_Hessian_linear`](@ref) using `a`, `b`, `X`, and `Y` -* `check_symmetry`: (`true`) verify that the Hessian is symmetric, see [`is_Hessian_symmetric`](@ref) -* `check_vector`: (`false`) verify that ``\operatorname{Hess} f(p)[X] ∈ T_p\mathcal M`` using `is_vector`. -* `mode`: (`:Default`) specify the mode for the verification; the default assumption is, +* `check_grad=true`: + verify that ``$_l_grad f(p) ∈ $(_l_TpM())``. +* `check_linearity=true`: + verify that the Hessian is linear, see [`is_Hessian_linear`](@ref) using `a`, `b`, `X`, and `Y` +* `check_symmetry=true`: + verify that the Hessian is symmetric, see [`is_Hessian_symmetric`](@ref) +* `check_vector=false`: + verify that `$_l_Hess f(p)[X] ∈ $(_l_TpM())`` using `is_vector`. +* `mode=:Default`: + specify the mode for the verification; the default assumption is, that the retraction provided is of second order. Otherwise one can also verify the Hessian if the point `p` is a critical point. THen set the mode to `:CritalPoint` to use [`gradient_descent`](@ref) to find a critical point. Note: this requires (and evaluates) new tangent vectors `X` and `Y` - * `atol`, `rtol`: (same defaults as `isapprox`) tolerances that are passed down to all checks * `a`, `b` two real values to verify linearity of the Hessian (if `check_linearity=true`) -* `N`: (`101`) number of points to verify within the `log_range` default range ``[10^{-8},10^{0}]`` -* `exactness_tol`: (`1e-12`) if all errors are below this tolerance, the verification is considered to be exact -* `io`: (`nothing`) provide an `IO` to print the result to -* `gradient`: (`grad_f(M, p)`) instead of the gradient function you can also provide the gradient at `p` directly -* `Hessian`: (`Hess_f(M, p, X)`) instead of the Hessian function you can provide the result of ``\operatorname{Hess} f(p)[X]`` directly. +* `N=101`: + number of points to verify within the `log_range` default range ``[10^{-8},10^{0}]`` +* `exactness_tol=1e-12`: + if all errors are below this tolerance, the verification is considered to be exact +* `io=nothing`: + provide an `IO` to print the result to +* `gradient=grad_f(M, p)`: + instead of the gradient function you can also provide the gradient at `p` directly +* `Hessian=Hess_f(M, p, X)`: + instead of the Hessian function you can provide the result of ``$_l_Hess f(p)[X]`` directly. Note that evaluations of the Hessian might still be necessary for checking linearity and symmetry and/or when using `:CriticalPoint` mode. -* `limits`: (`(1e-8,1)`) specify the limits in the `log_range` -* `log_range`: (`range(limits[1], limits[2]; length=N)`) specify the range of points (in log scale) to sample the Hessian line -* `N`: (`101`) number of points to use within the `log_range` default range ``[10^{-8},10^{0}]`` -* `plot`: (`false`) whether to plot the resulting verification (requires `Plots.jl` to be loaded). The plot is in log-log-scale. This is returned and can then also be saved. -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) retraction method to use for -* `slope_tol`: (`0.1`) tolerance for the slope (global) of the approximation -* `error`: (`:none`) how to handle errors, possible values: `:error`, `:info`, `:warn` -* `window`: (`nothing`) specify window sizes within the `log_range` that are used for the slope estimation. +* `limits=(1e-8,1)`: + specify the limits in the `log_range` +* `log_range=range(limits[1], limits[2]; length=N)`: + specify the range of points (in log scale) to sample the Hessian line +* `N=101`: + number of points to use within the `log_range` default range ``[10^{-8},10^{0}]`` +* `plot=false`: + whether to plot the resulting verification (requires `Plots.jl` to be loaded). The plot is in log-log-scale. This is returned and can then also be saved. +* $_kw_retraction_method_default: + $_kw_retraction_method +* `slope_tol=0.1`: + tolerance for the slope (global) of the approximation +* `error=:none`: + how to handle errors, possible values: `:error`, `:info`, `:warn` +* `window=nothing`: + specify window sizes within the `log_range` that are used for the slope estimation. the default is, to use all window sizes `2:N`. The `kwargs...` are also passed down to the `check_vector` and the `check_gradient` call, such that tolerances can @@ -302,7 +342,8 @@ which is checked using `isapprox` and the keyword arguments are passed to this f # Optional arguments -* `error`: (`:none`) how to handle errors, possible values: `:error`, `:info`, `:warn` +* `error=:none`: + how to handle errors, possible values: `:error`, `:info`, `:warn` """ function is_Hessian_linear( @@ -345,7 +386,8 @@ which is checked using `isapprox` and the `kwargs...` are passed to this functio # Optional arguments * `atol`, `rtol` with the same defaults as the usual `isapprox` -* `error`: (`:none`) how to handle errors, possible values: `:error`, `:info`, `:warn` +* `error=:none`: + how to handle errors, possible values: `:error`, `:info`, `:warn` """ function is_Hessian_symmetric( M, diff --git a/src/helpers/exports/Asymptote.jl b/src/helpers/exports/Asymptote.jl index 4279f251a6..d50b0525fb 100644 --- a/src/helpers/exports/Asymptote.jl +++ b/src/helpers/exports/Asymptote.jl @@ -1,38 +1,50 @@ @doc raw""" - asymptote_export_S2_signals(filename; points, curves, tangent_vectors, colors, options...) + asymptote_export_S2_signals(filename; points, curves, tangent_vectors, colors, kwargs...) + Export given `points`, `curves`, and `tangent_vectors` on the sphere ``\mathbb S^2`` to Asymptote. # Input * `filename` a file to store the Asymptote code in. -# Optional arguments for the data +# Keywaord arguments for the data -* `colors` dictionary of color arrays (indexed by symbols `:points`, `:curves` - and `:tvector`) where each entry has to provide as least as many colors as - the length of the corresponding sets. -* `curves` an `Array` of `Arrays` of points on the sphere, where each inner array is - interpreted as a curve and is accompanied by an entry within `colors` -* `points` an `Array` of `Arrays` of points on the sphere where each inner array is - interpreted as a set of points and is accompanied by an entry within `colors` -* `tangent_vectors` an `Array` of `Arrays` of tuples, where the first is a points, - the second a tangent vector and each set of vectors is accompanied by an entry - from within `colors` +* `colors=Dict{Symbol,Array{RGBA{Float64},1}}()`: dictionary of color arrays, + indexed by symbols `:points`, `:curves` and `:tvector`, where each entry has to provide + as least as many colors as the length of the corresponding sets. +* `curves=Array{Array{Float64,1},1}(undef, 0)`: an `Array` of `Arrays` of points + on the sphere, where each inner array is interpreted as a curve + and is accompanied by an entry within `colors`. +* `points=Array{Array{Float64,1},1}(undef, 0)`: an `Array` of `Arrays` of points + on the sphere where each inner array is interpreted as a set of points and is accompanied + by an entry within `colors`. +* `tangent_vectors=Array{Array{Tuple{Float64,Float64},1},1}(undef, 0)`: + an `Array` of `Arrays` of tuples, where the first is a points, the second a tangent vector + and each set of vectors is accompanied by an entry from within `colors`. -# Optional arguments for asymptote +# Keyword arguments for asymptote -* `arrow_head_size`: (`6.0`) size of the arrowheads of the tangent vectors +* `arrow_head_size=6.0`: + size of the arrowheads of the tangent vectors * `arrow_head_sizes` overrides the previous value to specify a value per `tVector`` set. -* `camera_position`: (`(1., 1., 0.)`) position of the camera in the Asymptote scene -* `line_width`: (`1.0`) size of the lines used to draw the curves. +* `camera_position=(1., 1., 0.)`: + position of the camera in the Asymptote scene +* `line_width=1.0`: + size of the lines used to draw the curves. * `line_widths` overrides the previous value to specify a value per curve and `tVector`` set. -* `dot_size`: (`1.0`) size of the dots used to draw the points. +* `dot_size=1.0`: + size of the dots used to draw the points. * `dot_sizes` overrides the previous value to specify a value per point set. -* `size`: (`nothing`) a tuple for the image size, otherwise a relative size `4cm` is used. -* `sphere_color`: (`RGBA{Float64}(0.85, 0.85, 0.85, 0.6)`) color of the sphere the data is drawn on -* `sphere_line_color`: (`RGBA{Float64}(0.75, 0.75, 0.75, 0.6)`) color of the lines on the sphere -* `sphere_line_width`: (`0.5`) line width of the lines on the sphere -* `target`: (`(0.,0.,0.)`) position the camera points at +* `size=nothing`: + a tuple for the image size, otherwise a relative size `4cm` is used. +* `sphere_color=RGBA{Float64}(0.85, 0.85, 0.85, 0.6)`: + color of the sphere the data is drawn on +* `sphere_line_color=RGBA{Float64}(0.75, 0.75, 0.75, 0.6)`: + color of the lines on the sphere +* `sphere_line_width=0.5`: + line width of the lines on the sphere +* `target=(0.,0.,0.)`: + position the camera points at """ function asymptote_export_S2_signals( filename::String; @@ -204,12 +216,14 @@ or three-dimensional data with points on the [Sphere](https://juliamanifolds.git * `data` a point representing the 1D,2D, or 3D array of points * `elevation_color_scheme` A `ColorScheme` for elevation -* `scale_axes`: (`(1/3,1/3,1/3)`) move spheres closer to each other by a factor +* `scale_axes=(1/3,1/3,1/3)`: + move spheres closer to each other by a factor per direction # Optional arguments for asymptote -* `arrow_head_size`: (`1.8`) size of the arrowheads of the vectors (in mm) +* `arrow_head_size=1.8`: + size of the arrowheads of the vectors (in mm) * `camera_position` position of the camera scene (default: atop the center of the data in the xy-plane) * `target` position the camera points at (default: center of xy-plane within data). """ @@ -278,7 +292,8 @@ definite matrices. * `data` a point representing the 1D, 2D, or 3D array of SPD matrices * `color_scheme` a `ColorScheme` for Geometric Anisotropy Index -* `scale_axes`: (`(1/3,1/3,1/3)`) move symmetric positive definite matrices +* `scale_axes=(1/3,1/3,1/3)`: + move symmetric positive definite matrices closer to each other by a factor per direction compared to the distance estimated by the maximal eigenvalue of all involved SPD points @@ -374,9 +389,11 @@ be given as a relative or full path the default values are given in brackets -* `render`: (`4`) render level of asymptote passed to its `-render` option. +* `render=4`: + render level of asymptote passed to its `-render` option. This can be removed from the command by setting it to `nothing`. -* `format`: (`"png"`) final rendered format passed to the `-f` option +* `format="png"`: + final rendered format passed to the `-f` option * `export_file`: (the filename with format as ending) specify the export filename """ function render_asymptote( diff --git a/src/plans/Douglas_Rachford_plan.jl b/src/plans/Douglas_Rachford_plan.jl index 1c588678ab..89eb927d9c 100644 --- a/src/plans/Douglas_Rachford_plan.jl +++ b/src/plans/Douglas_Rachford_plan.jl @@ -16,28 +16,26 @@ see also [`reflect`](@ref reflect(M::AbstractManifold, p, x))`(M,p,x)`, to which """ reflect(M::AbstractManifold, pr::Function, x; kwargs...) -@doc raw""" +@doc """ reflect(M, p, x, kwargs...) reflect!(M, q, p, x, kwargs...) Reflect the point `x` from the manifold `M` at point `p`, given by -````math - \operatorname{refl}_p(x) = \operatorname{retr}_p(-\operatorname{retr}^{-1}_p x). -```` +$_math_reflect -where ``\operatorname{retr}`` and ``\operatorname{retr}^{-1}`` denote a retraction and an inverse -retraction, respectively. This can also be done in place of `q`. ## Keyword arguments -* `retraction_method`: (`default_retraction_metiod(M, typeof(p))`) the retraction to use in the reflection -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) the inverse retraction to use within the reflection +* $_kw_retraction_method_default: + $_kw_retraction_method +* $_kw_inverse_retraction_method_default: + $_kw_inverse_retraction_method and for the `reflect!` additionally -* `X`: (`zero_vector(M,p)`) a temporary memory to compute the inverse retraction in place. +* `X=zero_vector(M,p)`: a temporary memory to compute the inverse retraction in place. otherwise this is the memory that would be allocated anyways. """ reflect(M::AbstractManifold, p::Any, x; kwargs...) diff --git a/src/plans/alternating_gradient_plan.jl b/src/plans/alternating_gradient_plan.jl index 4d6c24c87c..0c656ed7cf 100644 --- a/src/plans/alternating_gradient_plan.jl +++ b/src/plans/alternating_gradient_plan.jl @@ -14,6 +14,7 @@ An alternating gradient objective consists of This Objective is usually defined using the `ProductManifold` from `Manifolds.jl`, so `Manifolds.jl` to be loaded. # Constructors + ManifoldAlternatingGradientObjective(F, gradF::Function; evaluation=AllocatingEvaluation() ) @@ -119,10 +120,10 @@ function get_gradient( M::AbstractManifold, mago::ManifoldAlternatingGradientObjective{InplaceEvaluation,TC}, p, - k, + i, ) where {TC} - X = zero_vector(M[k], p[M, k]) - get_gradient!(M, X, mago, p, k) + X = zero_vector(M[i], p[M, i]) + get_gradient!(M, X, mago, p, i) return X end function get_gradient!( diff --git a/src/plans/augmented_lagrangian_plan.jl b/src/plans/augmented_lagrangian_plan.jl index e62c1ad60f..8f696b70db 100644 --- a/src/plans/augmented_lagrangian_plan.jl +++ b/src/plans/augmented_lagrangian_plan.jl @@ -1,12 +1,6 @@ -@doc raw""" - AugmentedLagrangianCost{CO,R,T} <: AbstractConstrainedFunctor - -Stores the parameters ``ρ ∈ ℝ``, ``μ ∈ ℝ^m``, ``λ ∈ ℝ^n`` -of the augmented Lagrangian associated to the [`ConstrainedManifoldObjective`](@ref) `co`. - -This struct is also a functor `(M,p) -> v` that can be used as a cost function within a solver, -based on the internal [`ConstrainedManifoldObjective`](@ref) it computes - +#_doc_al_Cost() = "$(_l_cal("L"))_\\rho(p, μ, λ)" +_doc_al_Cost(iter) = "$(_l_cal("L"))_{ρ^{($iter)}}(p, μ^{($iter)}, λ^{($iter)})" +_doc_AL_Cost_long = raw""" ```math \mathcal L_\rho(p, μ, λ) = f(x) + \frac{ρ}{2} \biggl( @@ -15,6 +9,18 @@ based on the internal [`ConstrainedManifoldObjective`](@ref) it computes \sum_{i=1}^m \max\Bigl\{ 0, \frac{μ_i}{ρ} + g_i(p) \Bigr\}^2 \Bigr) ``` +""" + +@doc """ + AugmentedLagrangianCost{CO,R,T} + +Stores the parameters ``ρ ∈ ℝ``, ``μ ∈ ℝ^m``, ``λ ∈ ℝ^n`` +of the augmented Lagrangian associated to the [`ConstrainedManifoldObjective`](@ref) `co`. + +This struct is also a functor `(M,p) -> v` that can be used as a cost function within a solver, +based on the internal [`ConstrainedManifoldObjective`](@ref) it computes + +$_doc_AL_Cost_long ## Fields @@ -63,7 +69,7 @@ additionally this gradient does accept a positional last argument to specify the for the internal gradient call of the constrained objective. based on the internal [`ConstrainedManifoldObjective`](@ref) and computes the gradient -``\operatorname{grad} \mathcal L_{ρ}(p, μ, λ)``, see also [`AugmentedLagrangianCost`](@ref). +`$_l_grad $(_l_cal("L"))_{ρ}(p, μ, λ)``, see also [`AugmentedLagrangianCost`](@ref). ## Fields diff --git a/src/plans/cache.jl b/src/plans/cache.jl index 7597007c50..0b58720637 100644 --- a/src/plans/cache.jl +++ b/src/plans/cache.jl @@ -1,11 +1,11 @@ # # A Simple Cache for Objectives # -@doc raw""" +@doc """ SimpleManifoldCachedObjective{O<:AbstractManifoldGradientObjective{E,TC,TG}, P, T,C} <: AbstractManifoldGradientObjective{E,TC,TG} Provide a simple cache for an [`AbstractManifoldGradientObjective`](@ref) that is for a given point `p` this cache -stores a point `p` and a gradient ``\operatorname{grad} f(p)`` in `X` as well as a cost value ``f(p)`` in `c`. +stores a point `p` and a gradient ``$(_l_grad) f(p)`` in `X` as well as a cost value ``f(p)`` in `c`. Both `X` and `c` are accompanied by booleans to keep track of their validity. @@ -13,11 +13,13 @@ Both `X` and `c` are accompanied by booleans to keep track of their validity. SimpleManifoldCachedObjective(M::AbstractManifold, obj::AbstractManifoldGradientObjective; kwargs...) -## Keyword -* `p`: (`rand(M)`) a point on the manifold to initialize the cache with -* `X`: (`get_gradient(M, obj, p)` or `zero_vector(M,p)`) a tangent vector to store the gradient in, see also `initialize` -* `c`: (`get_cost(M, obj, p)` or `0.0`) a value to store the cost function in `initialize` -* `initialized`: (`true`) whether to initialize the cached `X` and `c` or not. +## Keyword arguments + +* `p=`$(_link_rand()): a point on the manifold to initialize the cache with +* `X=get_gradient(M, obj, p)` or `zero_vector(M,p)`: a tangent vector to store the gradient in, + see also `initialize=` +* `c=[`get_cost`](@ref)`(M, obj, p)` or `0.0`: a value to store the cost function in `initialize` +* `initialized=true`: whether to initialize the cached `X` and `c` or not. """ mutable struct SimpleManifoldCachedObjective{ E<:AbstractEvaluationType,O<:AbstractManifoldObjective{E},P,T,C @@ -236,12 +238,18 @@ which function evaluations to cache. # Keyword arguments -* `p`: (`rand(M)`) the type of the keys to be used in the caches. Defaults to the default representation on `M`. -* `value`: (`get_cost(M, objective, p)`) the type of values for numeric values in the cache -* `X`: (`zero_vector(M,p)`) the type of values to be cached for gradient and Hessian calls. -* `cache`: (`[:Cost]`) a vector of symbols indicating which function calls should be cached. -* `cache_size`: (`10`) number of (least recently used) calls to cache -* `cache_sizes`: (`Dict{Symbol,Int}()`) a named tuple or dictionary specifying the sizes individually for each cache. +* `p=rand(M)`: + the type of the keys to be used in the caches. Defaults to the default representation on `M`. +* `value=get_cost(M, objective, p)`: + the type of values for numeric values in the cache +* `X=zero_vector(M,p)`: + the type of values to be cached for gradient and Hessian calls. +* `cache=[:Cost]`: + a vector of symbols indicating which function calls should be cached. +* `cache_size=10`: + number of (least recently used) calls to cache +* `cache_sizes=Dict{Symbol,Int}()`: + a named tuple or dictionary specifying the sizes individually for each cache. """ @@ -406,13 +414,13 @@ end # # Constraints function get_equality_constraint( - M::AbstractManifold, co::ManifoldCachedObjective, p, i::Integer + M::AbstractManifold, co::ManifoldCachedObjective, p, j::Integer ) (!haskey(co.cache, :EqualityConstraint)) && - return get_equality_constraint(M, co.objective, p, i) + return get_equality_constraint(M, co.objective, p, j) return copy(# Return a copy of the version in the cache - get!(co.cache[:EqualityConstraint], (copy(M, p), i)) do - get_equality_constraint(M, co.objective, p, i) + get!(co.cache[:EqualityConstraint], (copy(M, p), j)) do + get_equality_constraint(M, co.objective, p, j) end, ) end @@ -570,18 +578,18 @@ function get_grad_equality_constraint!( X, co::ManifoldCachedObjective, p, - i::Integer, + j::Integer, range::Union{AbstractPowerRepresentation,Nothing}=nothing, ) !(haskey(co.cache, :GradEqualityConstraint)) && - return get_grad_equality_constraint!(M, X, co.objective, p, i) + return get_grad_equality_constraint!(M, X, co.objective, p, j) copyto!( M, X, p, - get!(co.cache[:GradEqualityConstraint], (copy(M, p), i)) do + get!(co.cache[:GradEqualityConstraint], (copy(M, p), j)) do # This evaluates in place of X - get_grad_equality_constraint!(M, X, co.objective, p, i) + get_grad_equality_constraint!(M, X, co.objective, p, j) copy(M, p, X) #this creates a copy to be placed in the cache end, #and copy the values back to X ) @@ -740,18 +748,18 @@ function get_grad_inequality_constraint!( X, co::ManifoldCachedObjective, p, - j::Integer, + i::Integer, range::Union{AbstractPowerRepresentation,Nothing}=nothing, ) !(haskey(co.cache, :GradInequalityConstraint)) && - return get_grad_inequality_constraint!(M, X, co.objective, p, j) + return get_grad_inequality_constraint!(M, X, co.objective, p, i) copyto!( M, X, p, - get!(co.cache[:GradInequalityConstraint], (copy(M, p), j)) do + get!(co.cache[:GradInequalityConstraint], (copy(M, p), i)) do # This evaluates in place of X - get_grad_inequality_constraint!(M, X, co.objective, p, j) + get_grad_inequality_constraint!(M, X, co.objective, p, i) copy(M, p, X) #this creates a copy to be placed in the cache end, #and copy the values back to X ) diff --git a/src/plans/conjugate_gradient_plan.jl b/src/plans/conjugate_gradient_plan.jl index c1c1fbf918..d8328a69f0 100644 --- a/src/plans/conjugate_gradient_plan.jl +++ b/src/plans/conjugate_gradient_plan.jl @@ -19,7 +19,7 @@ function DirectionUpdateRuleStorage( return DirectionUpdateRuleStorage{typeof(dur),typeof(sa)}(dur, sa) end -@doc raw""" +@doc """ ConjugateGradientState <: AbstractGradientSolverState specify options for a conjugate gradient descent algorithm, that solves a @@ -27,15 +27,15 @@ specify options for a conjugate gradient descent algorithm, that solves a # Fields -* `p`: the current iterate, a point on a manifold -* `X`: the current gradient, also denoted as ``ξ`` or ``X_k`` for the gradient in the ``k``th step. +* $_field_p +* $_field_X * `δ`: the current descent direction, also a tangent vector * `β`: the current update coefficient rule, see . -* `coefficient`: ([`ConjugateDescentCoefficient`](@ref)`()`) a [`DirectionUpdateRule`](@ref) function to determine the new `β` -* `stepsize`: ([`default_stepsize`](@ref)`(M, ConjugateGradientDescentState; retraction_method=retraction_method)`) a [`Stepsize`](@ref) function -* `stop`: ([`StopAfterIteration`](@ref)`(500) | `[`StopWhenGradientNormLess`](@ref)`(1e-8)`) a [`StoppingCriterion`](@ref) -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a type of retraction -* `vector_transport_method`: (`default_retraction_method(M, typeof(p))`) a type of retraction +* `coefficient`: function to determine the new `β` +* $_field_step +* $_field_stop +* $_field_retr +* $_field_vector_transp # Constructor @@ -45,6 +45,15 @@ where the last five fields can be set by their names as keyword and the `X` can be set to a tangent vector type using the keyword `initial_gradient` which defaults to `zero_vector(M,p)`, and `δ` is initialized to a copy of this vector. +## Keyword arguments + +The following fields from above 0 diff --git a/src/plans/constrained_plan.jl b/src/plans/constrained_plan.jl index 65cb7609fa..5ed0a5869e 100644 --- a/src/plans/constrained_plan.jl +++ b/src/plans/constrained_plan.jl @@ -357,7 +357,7 @@ problem is for. hessian_inequality_range=range ) -Creates a constrained Manopt problem specifying an [`AbstractPowerRepresentation`](@extref ManifoldsBase.AbstractPowerRepresentation) +Creates a constrained Manopt problem specifying an [`AbstractPowerRepresentation`](@extref `ManifoldsBase.AbstractPowerRepresentation`) for both the `gradient_equality_range` and the `gradient_inequality_range`, respectively. """ struct ConstrainedManoptProblem{ diff --git a/src/plans/count.jl b/src/plans/count.jl index b2e0d0c48e..bfe7172c99 100644 --- a/src/plans/count.jl +++ b/src/plans/count.jl @@ -311,10 +311,10 @@ function get_equality_constraint( return get_equality_constraint(M, co.objective, p, c) end function get_equality_constraint( - M::AbstractManifold, co::ManifoldCountObjective, p, i::Integer + M::AbstractManifold, co::ManifoldCountObjective, p, j::Integer ) - _count_if_exists(co, :EqualityConstraint, i) - return get_equality_constraint(M, co.objective, p, i) + _count_if_exists(co, :EqualityConstraint, j) + return get_equality_constraint(M, co.objective, p, j) end function get_equality_constraint(M::AbstractManifold, co::ManifoldCountObjective, p, i) for j in _to_iterable_indices(1:equality_constraints_length(co.objective), i) @@ -349,10 +349,10 @@ function get_grad_equality_constraint( return get_grad_equality_constraint(M, co.objective, p, i) end function get_grad_equality_constraint( - M::AbstractManifold, co::ManifoldCountObjective, p, i::Integer + M::AbstractManifold, co::ManifoldCountObjective, p, j::Integer ) - _count_if_exists(co, :GradEqualityConstraint, i) - return get_grad_equality_constraint(M, co.objective, p, i) + _count_if_exists(co, :GradEqualityConstraint, j) + return get_grad_equality_constraint(M, co.objective, p, j) end function get_grad_equality_constraint(M::AbstractManifold, co::ManifoldCountObjective, p, i) for j in _to_iterable_indices(1:equality_constraints_length(co.objective), i) @@ -367,10 +367,10 @@ function get_grad_equality_constraint!( return get_grad_equality_constraint!(M, X, co.objective, p, i) end function get_grad_equality_constraint!( - M::AbstractManifold, X, co::ManifoldCountObjective, p, i::Integer + M::AbstractManifold, X, co::ManifoldCountObjective, p, j::Integer ) - _count_if_exists(co, :GradEqualityConstraint, i) - return get_grad_equality_constraint!(M, X, co.objective, p, i) + _count_if_exists(co, :GradEqualityConstraint, j) + return get_grad_equality_constraint!(M, X, co.objective, p, j) end function get_grad_equality_constraint!( M::AbstractManifold, X, co::ManifoldCountObjective, p, i diff --git a/src/plans/debug.jl b/src/plans/debug.jl index 340323aad7..cf46b21d1a 100644 --- a/src/plans/debug.jl +++ b/src/plans/debug.jl @@ -2,7 +2,7 @@ DebugAction A `DebugAction` is a small functor to print/issue debug output. The usual call is given by -`(p::AbstractManoptProblem, s::AbstractManoptSolverState, i) -> s`, where `i` is +`(p::AbstractManoptProblem, s::AbstractManoptSolverState, k) -> s`, where `i` is the current iterate. By convention `i=0` is interpreted as "For Initialization only," only debug @@ -123,9 +123,9 @@ the complete string mutable struct DebugGroup{D<:DebugAction} <: DebugAction group::Vector{D} end -function (d::DebugGroup)(p::AbstractManoptProblem, st::AbstractManoptSolverState, i) +function (d::DebugGroup)(p::AbstractManoptProblem, st::AbstractManoptSolverState, k) for di in d.group - di(p, st, i) + di(p, st, k) end end function status_summary(dg::DebugGroup) @@ -150,7 +150,7 @@ end @doc raw""" DebugEvery <: DebugAction -evaluate and print debug only every $i$th iteration. Otherwise no print is performed. +evaluate and print debug only every ``k``th iteration. Otherwise no print is performed. Whether internal variables are updates is determined by `always_update`. This method does not perform any print itself but relies on it's children's print. @@ -176,9 +176,9 @@ mutable struct DebugEvery <: DebugAction return new(d, every, always_update, activation_offset) end end -function (d::DebugEvery)(p::AbstractManoptProblem, st::AbstractManoptSolverState, i) - if (rem(i, d.every) == 0) - d.debug(p, st, i) +function (d::DebugEvery)(p::AbstractManoptProblem, st::AbstractManoptSolverState, k) + if (rem(k, d.every) == 0) + d.debug(p, st, k) elseif d.always_update d.debug(p, st, -1) end @@ -188,7 +188,7 @@ function (d::DebugEvery)(p::AbstractManoptProblem, st::AbstractManoptSolverState :SubState, :Debug, :Activity, - !(i < 1) && (rem(i + d.activation_offset, d.every) == 0), + !(k < 1) && (rem(k + d.activation_offset, d.every) == 0), ) return nothing end @@ -219,19 +219,21 @@ end # # Special single ones # -@doc raw""" - DebugChange(M=DefaultManifold()) +@doc """ + DebugChange(M=DefaultManifold(); kwargs...) debug for the amount of change of the iterate (stored in `get_iterate(o)` of the [`AbstractManoptSolverState`](@ref)) during the last iteration. See [`DebugEntryChange`](@ref) for the general case # Keyword parameters -* `storage`: (`StoreStateAction( [:Gradient] )` storage of the previous action -* `prefix`: (`"Last Change:"`) prefix of the debug output (ignored if you set `format`) -* `io`: (`stdout`) default stream to print the debug to. -* `format`: ( `"$prefix %f"`) format to print the output. -* `inverse_retraction_method`: (`default_inverse_retraction_method(M)`) the inverse retraction +* `storage=`[`StoreStateAction`](@ref)`( [:Gradient] )` storage of the previous action +* `prefix="Last Change:"`: prefix of the debug output (ignored if you set `format`) +* `io=stdout`: default stream to print the debug to. +* $_kw_inverse_retraction_method_default: + $_kw_inverse_retraction_method + +the inverse retraction to be used for approximating distance. """ mutable struct DebugChange{IR<:AbstractInverseRetractionMethod} <: DebugAction @@ -271,9 +273,9 @@ mutable struct DebugChange{IR<:AbstractInverseRetractionMethod} <: DebugAction return new{typeof(irm)}(io, format, storage, irm) end end -function (d::DebugChange)(mp::AbstractManoptProblem, st::AbstractManoptSolverState, i) +function (d::DebugChange)(mp::AbstractManoptProblem, st::AbstractManoptSolverState, k) M = get_manifold(mp) - (i > 0) && Printf.format( + (k > 0) && Printf.format( d.io, Printf.Format(d.format), distance( @@ -283,7 +285,7 @@ function (d::DebugChange)(mp::AbstractManoptProblem, st::AbstractManoptSolverSta d.inverse_retraction_method, ), ) - d.storage(mp, st, i) + d.storage(mp, st, k) return nothing end function show(io::IO, dc::DebugChange) @@ -294,7 +296,7 @@ function show(io::IO, dc::DebugChange) end status_summary(dc::DebugChange) = "(:Change, \"$(escape_string(dc.format))\")" -@doc raw""" +@doc """ DebugCost <: DebugAction print the current cost function value, see [`get_cost`](@ref). @@ -304,9 +306,9 @@ print the current cost function value, see [`get_cost`](@ref). # Parameters -* `format`: (`"$prefix %f"`) format to print the output -* `io`: (`stdout`) default stream to print the debug to. -* `long`: (`false`) short form to set the format to `f(x):` (default) or `current cost: ` and the cost +* `format="\$prefix %f"`: format to print the output +* `io=stdout`: default stream to print the debug to. +* `long=false`: short form to set the format to `f(x):` (default) or `current cost: ` and the cost """ mutable struct DebugCost <: DebugAction io::IO @@ -317,8 +319,8 @@ mutable struct DebugCost <: DebugAction return new(io, format) end end -function (d::DebugCost)(p::AbstractManoptProblem, st::AbstractManoptSolverState, i::Int) - (i >= 0) && Printf.format(d.io, Printf.Format(d.format), get_cost(p, get_iterate(st))) +function (d::DebugCost)(p::AbstractManoptProblem, st::AbstractManoptSolverState, k::Int) + (k >= 0) && Printf.format(d.io, Printf.Format(d.format), get_cost(p, get_iterate(st))) return nothing end function show(io::IO, di::DebugCost) @@ -340,8 +342,8 @@ mutable struct DebugDivider{TIO<:IO} <: DebugAction divider::String DebugDivider(divider=" | "; io::IO=stdout) = new{typeof(io)}(io, divider) end -function (d::DebugDivider)(::AbstractManoptProblem, ::AbstractManoptSolverState, i::Int) - if i >= 0 && !isempty(d.divider) +function (d::DebugDivider)(::AbstractManoptProblem, ::AbstractManoptSolverState, k::Int) + if k >= 0 && !isempty(d.divider) print(d.io, d.divider) end return nothing @@ -374,8 +376,8 @@ mutable struct DebugEntry <: DebugAction return new(io, format, f) end end -function (d::DebugEntry)(::AbstractManoptProblem, st::AbstractManoptSolverState, i) - (i >= 0) && Printf.format(d.io, Printf.Format(d.format), getfield(st, d.field)) +function (d::DebugEntry)(::AbstractManoptProblem, st::AbstractManoptSolverState, k) + (k >= 0) && Printf.format(d.io, Printf.Format(d.format), getfield(st, d.field)) return nothing end function show(io::IO, di::DebugEntry) @@ -494,8 +496,8 @@ mutable struct DebugIfEntry{F} <: DebugAction return new{F}(io, check, f, message, type) end end -function (d::DebugIfEntry)(::AbstractManoptProblem, st::AbstractManoptSolverState, i) - if (i >= 0) && (!d.check(getfield(st, d.field))) +function (d::DebugIfEntry)(::AbstractManoptProblem, st::AbstractManoptSolverState, k) + if (k >= 0) && (!d.check(getfield(st, d.field))) format = Printf.Format(d.msg) msg = !('%' ∈ d.msg) ? d.msg : Printf.format(format, getfield(st, d.field)) d.type === :warn && (@warn "$(msg)") @@ -516,9 +518,9 @@ print a certain entries change during iterates # Additional fields -* `print`: (`print`) function to print the result -* `prefix`: (`"Change of :Iterate"`) prefix to the print out -* `format`: (`"$prefix %e"`) format to print (uses the `prefix by default and scientific notation) +* `print`: function to print the result +* `prefix`: prefix to the print out +* `format`: format to print (uses the `prefix` by default and scientific notation) * `field`: Symbol the field can be accessed with within [`AbstractManoptSolverState`](@ref) * `distance`: function (p,o,x1,x2) to compute the change/distance between two values of the entry * `storage`: a [`StoreStateAction`](@ref) to store the previous value of `:f` @@ -527,13 +529,13 @@ print a certain entries change during iterates DebugEntryChange(f,d) -# Keyword arguments +## Keyword arguments -* `io`: (`stdout`) an `IOStream` -* `prefix`: (`"Change of $f"`) -* `storage`: (`StoreStateAction((f,))`) a [`StoreStateAction`](@ref) -* `initial_value`: an initial value for the change of `o.field`. -* `format`: (`"$prefix %e"`) format to print the change +* `io=stdout`: an `IOStream` used for the debug +* `prefix="Change of $f"`: the prefix +* `storage=StoreStateAction((f,))`: a [`StoreStateAction`](@ref) +* `initial_value=NaN`: an initial value for the change of `o.field`. +* `format="$prefix %e"`: format to print the change """ mutable struct DebugEntryChange <: DebugAction distance::Any @@ -557,17 +559,17 @@ mutable struct DebugEntryChange <: DebugAction end end function (d::DebugEntryChange)( - p::AbstractManoptProblem, st::AbstractManoptSolverState, i::Int + p::AbstractManoptProblem, st::AbstractManoptSolverState, k::Int ) - if i == 0 + if k == 0 # on init if field not present -> generate - !has_storage(d.storage, d.field) && d.storage(p, st, i) + !has_storage(d.storage, d.field) && d.storage(p, st, k) return nothing end x = get_storage(d.storage, d.field) v = d.distance(p, st, getproperty(st, d.field), x) Printf.format(d.io, Printf.Format(d.format), v) - d.storage(p, st, i) + d.storage(p, st, k) return nothing end function show(io::IO, dec::DebugEntryChange) @@ -585,10 +587,10 @@ during the last iteration. See [`DebugEntryChange`](@ref) for the general case # Keyword parameters -* `storage`: (`StoreStateAction( (:Gradient,) )`) storage of the action for previous data -* `prefix`: (`"Last Change:"`) prefix of the debug output (ignored if you set `format`) -* `io`: (`stdout`) default stream to print the debug to. -* `format`: ( `"$prefix %f"`) format to print the output +* `storage=`[`StoreStateAction`](@ref)`( (:Gradient,) )`: storage of the action for previous data +* `prefix="Last Change:"`: prefix of the debug output (ignored if you set `format`: +* `io=stdout`: default stream to print the debug to. +* `format="$prefix %f"`: format to print the output """ mutable struct DebugGradientChange{VTR<:AbstractVectorTransportMethod} <: DebugAction io::IO @@ -616,9 +618,9 @@ mutable struct DebugGradientChange{VTR<:AbstractVectorTransportMethod} <: DebugA end end function (d::DebugGradientChange)( - pm::AbstractManoptProblem, st::AbstractManoptSolverState, i + pm::AbstractManoptProblem, st::AbstractManoptSolverState, k ) - if i > 0 + if k > 0 M = get_manifold(pm) p_old = get_storage(d.storage, PointStorageKey(:Iterate)) X_old = get_storage(d.storage, VectorStorageKey(:Gradient)) @@ -629,7 +631,7 @@ function (d::DebugGradientChange)( ) Printf.format(d.io, Printf.Format(d.format), l) end - d.storage(pm, st, i) + d.storage(pm, st, k) return nothing end function show(io::IO, dgc::DebugGradientChange) @@ -648,14 +650,14 @@ end debug for the current iterate (stored in `get_iterate(o)`). # Constructor - DebugIterate() + DebugIterate(; kwargs...) -# Parameters +# Keyword arguments -* `io`: (`stdout`) default stream to print the debug to. -* `format`: (`"$prefix %s"`) format how to print the current iterate -* `long`: (`false`) whether to have a long (`"current iterate:"`) or a short (`"p:"`) prefix -* `prefix` (see `long` for default) set a prefix to be printed before the iterate +* `io=stdout`: default stream to print the debug to. +* `format="$prefix %s"`: format how to print the current iterate +* `long=false`: whether to have a long (`"current iterate:"`) or a short (`"p:"`) prefix default +* `prefix`: (see `long` for default) set a prefix to be printed before the iterate """ mutable struct DebugIterate <: DebugAction io::IO @@ -669,8 +671,8 @@ mutable struct DebugIterate <: DebugAction return new(io, format) end end -function (d::DebugIterate)(::AbstractManoptProblem, st::AbstractManoptSolverState, i::Int) - (i > 0) && Printf.format(d.io, Printf.Format(d.format), get_iterate(st)) +function (d::DebugIterate)(::AbstractManoptProblem, st::AbstractManoptSolverState, k::Int) + (k > 0) && Printf.format(d.io, Printf.Format(d.format), get_iterate(st)) return nothing end function show(io::IO, di::DebugIterate) @@ -687,8 +689,8 @@ status_summary(di::DebugIterate) = "(:Iterate, \"$(escape_string(di.format))\")" # Keyword parameters -* `format`: (`"# %-6d"`) format to print the output -* `io`: (`stdout`) default stream to print the debug to. +* `format="# %-6d"`: format to print the output +* `io=stdout`: default stream to print the debug to. debug for the current iteration (prefixed with `#` by ) """ @@ -697,9 +699,9 @@ mutable struct DebugIteration <: DebugAction format::String DebugIteration(; io::IO=stdout, format="# %-6d") = new(io, format) end -function (d::DebugIteration)(::AbstractManoptProblem, ::AbstractManoptSolverState, i::Int) - (i == 0) && print(d.io, "Initial ") - (i > 0) && Printf.format(d.io, Printf.Format(d.format), i) +function (d::DebugIteration)(::AbstractManoptProblem, ::AbstractManoptSolverState, k::Int) + (k == 0) && print(d.io, "Initial ") + (k > 0) && Printf.format(d.io, Printf.Format(d.format), k) return nothing end function show(io::IO, di::DebugIteration) @@ -739,10 +741,10 @@ mutable struct DebugMessages <: DebugAction return new(io, mode, warn) end end -function (d::DebugMessages)(::AbstractManoptProblem, st::AbstractManoptSolverState, i::Int) +function (d::DebugMessages)(::AbstractManoptProblem, st::AbstractManoptSolverState, k::Int) if d.status !== :No msg = get_message(st) - (i < 0 || length(msg) == 0) && (return nothing) + (k < 0 || length(msg) == 0) && (return nothing) (d.mode == :Warning) && (@warn msg) (d.mode == :Error) && (@error msg) (d.mode == :Print) && (print(d.io, msg)) @@ -771,8 +773,8 @@ empty, unless the algorithm stops. # Fields -* `prefix`: (`""`) format to print the output -* `io`: (`stdout`) default stream to print the debug to. +* `prefix=""`: format to print the output +* `io=stdout`: default stream to print the debug to. # Constructor @@ -785,9 +787,9 @@ mutable struct DebugStoppingCriterion <: DebugAction DebugStoppingCriterion(prefix=""; io::IO=stdout) = new(io, prefix) end function (d::DebugStoppingCriterion)( - ::AbstractManoptProblem, st::AbstractManoptSolverState, i::Int + ::AbstractManoptProblem, st::AbstractManoptSolverState, k::Int ) - print(d.io, (i > 0) ? "$(d.prefix)$(get_reason(st))" : "") + print(d.io, (k > 0) ? "$(d.prefix)$(get_reason(st))" : "") return nothing end function show(io::IO, c::DebugStoppingCriterion) @@ -829,11 +831,11 @@ mutable struct DebugWhenActive{D<:DebugAction} <: DebugAction return new{D}(d, active, always_update) end end -function (dwa::DebugWhenActive)(p::AbstractManoptProblem, st::AbstractManoptSolverState, i) +function (dwa::DebugWhenActive)(p::AbstractManoptProblem, st::AbstractManoptSolverState, k) if dwa.active - dwa.debug(p, st, i) - elseif (i <= 0) && (dwa.always_update) - dwa.debug(p, st, i) + dwa.debug(p, st, k) + elseif (k < 0) && (dwa.always_update) + dwa.debug(p, st, k) end end function show(io::IO, dwa::DebugWhenActive) @@ -860,12 +862,13 @@ The measured time is rounded using the given `time_accuracy` and printed after [ # Keyword parameters -* `io`: (`stdout`) default stream to print the debug to. -* `format`: ( `"$prefix %s"`) format to print the output, where `%s` is the canonicalized time`. -* `mode`: (`:cumulative`) whether to display the total time or reset on every call using `:iterative`. -* `prefix`: (`"Last Change:"`) prefix of the debug output (ignored if you set `format`) -* `start`: (`false`) indicate whether to start the timer on creation or not. Otherwise it might only be started on first call. -* `time_accuracy`: (`Millisecond(1)`) round the time to this period before printing the canonicalized time +* `io=stdout`: default stream to print the debug to. +* `format="$prefix %s"`: format to print the output, where `%s` is the canonicalized time`. +* `mode=:cumulative`: whether to display the total time or reset on every call using `:iterative`. +* `prefix="Last Change:"`: prefix of the debug output (ignored if you set `format`: +* `start=false`: indicate whether to start the timer on creation or not. + Otherwise it might only be started on first call. +* `time_accuracy=Millisecond(1)`: round the time to this period before printing the canonicalized time """ mutable struct DebugTime <: DebugAction io::IO @@ -884,8 +887,8 @@ mutable struct DebugTime <: DebugAction return new(io, format, Nanosecond(start ? time_ns() : 0), time_accuracy, mode) end end -function (d::DebugTime)(::AbstractManoptProblem, ::AbstractManoptSolverState, i) - if i == 0 || d.last_time == Nanosecond(0) # init +function (d::DebugTime)(::AbstractManoptProblem, ::AbstractManoptSolverState, k) + if k == 0 || d.last_time == Nanosecond(0) # init d.last_time = Nanosecond(time_ns()) else t = time_ns() @@ -957,21 +960,21 @@ mutable struct DebugWarnIfCostIncreases <: DebugAction DebugWarnIfCostIncreases(warn::Symbol=:Once; tol=1e-13) = new(warn, Float64(Inf), tol) end function (d::DebugWarnIfCostIncreases)( - p::AbstractManoptProblem, st::AbstractManoptSolverState, i::Int + p::AbstractManoptProblem, st::AbstractManoptSolverState, k::Int ) - (i < 0) && (return nothing) + (k < 0) && (return nothing) if d.status !== :No cost = get_cost(p, get_iterate(st)) if cost > d.old_cost + d.tol @warn """ The cost increased. - At iteration #$i the cost increased from $(d.old_cost) to $(cost). + At iteration #$k the cost increased from $(d.old_cost) to $(cost). """ if st isa GradientDescentState && st.stepsize isa ConstantStepsize @warn """ You seem to be running a `gradient_descent` with a `ConstantStepsize`. Maybe consider to use `ArmijoLinesearch` (if applicable) or use - `ConstantStepsize(value)` with a `value` less than $(get_last_stepsize(p,st,i)). + `ConstantStepsize(value)` with a `value` less than $(get_last_stepsize(p,st,k)). """ end if d.status === :Once @@ -1008,13 +1011,13 @@ mutable struct DebugWarnIfCostNotFinite <: DebugAction DebugWarnIfCostNotFinite(warn::Symbol=:Once) = new(warn) end function (d::DebugWarnIfCostNotFinite)( - p::AbstractManoptProblem, st::AbstractManoptSolverState, i::Int + p::AbstractManoptProblem, st::AbstractManoptSolverState, k::Int ) if d.status !== :No cost = get_cost(p, get_iterate(st)) if !isfinite(cost) @warn """The cost is not finite. - At iteration #$i the cost evaluated to $(cost). + At iteration #$k the cost evaluated to $(cost). """ if d.status === :Once @warn "Further warnings will be suppressed, use DebugWarnIfCostNotFinite(:Always) to get all warnings." @@ -1054,7 +1057,7 @@ mutable struct DebugWarnIfFieldNotFinite <: DebugAction end end function (d::DebugWarnIfFieldNotFinite)( - ::AbstractManoptProblem, st::AbstractManoptSolverState, i::Int + ::AbstractManoptProblem, st::AbstractManoptSolverState, k::Int ) if d.status !== :No if d.field == :Iterate @@ -1070,7 +1073,7 @@ function (d::DebugWarnIfFieldNotFinite)( if !all(isfinite.(v)) @warn """ $s is or contains values that are not finite. - At iteration #$i it evaluated to $(v). + At iteration #$k it evaluated to $(v). """ if d.status === :Once @warn "Further warnings will be suppressed, use DebugWaranIfFieldNotFinite(:$(d.field), :Always) to get all warnings." @@ -1113,7 +1116,7 @@ mutable struct DebugWarnIfGradientNormTooLarge{T} <: DebugAction end end function (d::DebugWarnIfGradientNormTooLarge)( - mp::AbstractManoptProblem, st::AbstractManoptSolverState, i::Int + mp::AbstractManoptProblem, st::AbstractManoptSolverState, k::Int ) if d.status !== :No M = get_manifold(mp) @@ -1122,7 +1125,7 @@ function (d::DebugWarnIfGradientNormTooLarge)( Xn = norm(M, p, X) p_inj = d.factor * max_stepsize(M, p) if Xn > p_inj - @warn """At iteration #$i + @warn """At iteration #$k the gradient norm ($Xn) is larger that $(d.factor) times the injectivity radius $(p_inj) at the current iterate. """ if d.status === :Once diff --git a/src/plans/difference_of_convex_plan.jl b/src/plans/difference_of_convex_plan.jl index 107a975b84..84adff0898 100644 --- a/src/plans/difference_of_convex_plan.jl +++ b/src/plans/difference_of_convex_plan.jl @@ -225,7 +225,7 @@ where both ``g`` and ``h`` are convex, lower semicontinuous and proper. # Fields -* `cost`: (`nothing`) implementation of ``f(p) = g(p)-h(p)`` (optional) +* `cost`: implementation of ``f(p) = g(p)-h(p)`` * `gradient`: the gradient of the cost * `grad_h!!`: a function ``\operatorname{grad}h: \mathcal M → T\mathcal M``, diff --git a/src/plans/docstring_snippets.jl b/src/plans/docstring_snippets.jl new file mode 100644 index 0000000000..7a5cc829bc --- /dev/null +++ b/src/plans/docstring_snippets.jl @@ -0,0 +1,167 @@ +# +# +# This file collects a few strings to be reused in documentation to avoid retyping everything + +# LateX symbols +_l_cal(letter::String) = raw"\mathcal " * "$letter" +_l_cO = raw"\mathcal O" +_l_ds = raw"\displaystyle" +_l_argmin = raw"\operatorname{arg\,min}" +_l_frac(a, b) = raw"\frac" * "{$a}{$b}" +_l_grad = raw"\operatorname{grad}" +_l_Hess = raw"\operatorname{Hess}" +_l_log = raw"\log" +_l_prox = raw"\operatorname{prox}" +_l_refl = raw"\operatorname{refl}_p(x) = \operatorname{retr}_p(-\operatorname{retr}^{-1}_p x)" +_l_subgrad = raw"∂" +_l_min = raw"\min" +_l_max = raw"\min" +_l_norm(v, i="") = raw"\lVert" * "$v" * raw"\rVert" * "_{$i}" +# Semantics +_l_Manifold(M="M") = _l_cal(M) +_l_M = "$(_l_Manifold())" +_l_TpM(p="p") = "T_{$p}$_l_M" +_l_DΛ = "DΛ: T_{m}$(_l_M) → T_{Λ(m)}$(_l_Manifold("N"))" +_l_grad_long = raw"\operatorname{grad} f: \mathcal M → T\mathcal M" +_l_Hess_long = "$_l_Hess f(p)[⋅]: $(_l_TpM()) → $(_l_TpM())" +_l_retr = raw"\operatorname{retr}" +_l_retr_long = raw"\operatorname{retr}: T\mathcal M \to \mathcal M" +_l_vt = raw"\mathcal T_{\cdot\gets\cdot}" +_l_C_subset_M = "$(_l_cal("C")) ⊂ $(_l_cal("M"))" +_l_txt(s) = "\\text{$s}" + +# Math terms +_math_VT = raw"a vector transport ``T``" +_math_inv_retr = "an inverse retraction ``$_l_retr^{-1}``" +_math_retr = " a retraction $_l_retr" +_math_reflect = raw""" +```math + \operatorname{refl}_p(x) = \operatorname{retr}_p(-\operatorname{retr}^{-1}_p x), +``` +where ``\operatorname{retr}`` and ``\operatorname{retr}^{-1}`` denote a retraction and an inverse +retraction, respectively. +""" +function _math_sequence(name, index, i_start=1, i_end="n") + return "\\{$(name)_{$index}\\}_{i=$(i_start)}^{$i_end}" +end + +_problem_default = raw""" +```math +\operatorname*{arg\,min}_{p ∈ \mathcal M} f(p) +``` +""" + +_problem_constrained = raw"""```math +\begin{aligned} +\min_{p ∈\mathcal{M}} &f(p)\\ +\text{subject to } &g_i(p)\leq 0 \quad \text{ for } i= 1, …, m,\\ +\quad &h_j(p)=0 \quad \text{ for } j=1,…,n, +\end{aligned} +``` +""" + +# Arguments of functions +_arg_alt_mgo = raw""" +Alternatively to `f` and `grad_f` you can provide +the [`AbstractManifoldGradientObjective`](@ref) `gradient_objective` directly. +""" + +# Arguments +_arg_f = raw"* `f`: a cost function ``f: \mathcal M→ℝ`` implemented as `(M, p) -> v`" +_arg_grad_f = raw""" +* `grad_f`: the gradient ``\operatorname{grad}f: \mathcal M → T\mathcal M`` of f + as a function `(M, p) -> X` or a function `(M, X, p) -> X` computing `X` in-place +""" +_arg_Hess_f = """ +* `Hess_f`: the Hessian ``$_l_Hess_long`` of f + as a function `(M, p, X) -> Y` or a function `(M, Y, p, X) -> Y` computing `Y` in-place +""" +_arg_p = raw"* `p` an initial value `p` ``= p^{(0)} ∈ \mathcal M``" +_arg_M = "* `M` a manifold ``$_l_M``" +_arg_inline_M = "the manifold `M`" +_arg_X = "* `X` a tangent vector" +_arg_sub_problem = "* `sub_problem` a [`AbstractManoptProblem`](@ref) to specify a problem for a solver or a closed form solution function." +_arg_sub_state = "* `sub_state` a [`AbstractManoptSolverState`](@ref) for the `sub_problem` or a [`AbstractEvaluationType`](@ref) if a closed form solution is provided." +_arg_subgrad_f = raw""" +* `∂f`: the subgradient ``∂f: \mathcal M → T\mathcal M`` of f + as a function `(M, p) -> X` or a function `(M, X, p) -> X` computing `X` in-place. + This function should always only return one element from the subgradient. +""" + +_doc_remark_tutorial_debug = "If you activate tutorial mode (cf. [`is_tutorial_mode`](@ref)), this solver provides additional debug warnings." +_doc_sec_output = """ +# Output + +The obtained approximate minimizer ``p^*``. +To obtain the whole final state of the solver, see [`get_solver_return`](@ref) for details, especially the `return_state=` keyword. +""" + +_sc_any = "[` | `](@ref StopWhenAny)" +_sc_all = "[` & `](@ref StopWhenAll)" + +# Fields +_field_at_iteration = "`at_iteration`: an integer indicating at which the stopping criterion last indicted to stop, which might also be before the solver started (`0`). Any negative value indicates that this was not yet the case; " +_field_iterate = "`p`: the current iterate ``p=p^{(k)} ∈ $(_l_M)``" +_field_gradient = "`X`: the current gradient ``$(_l_grad)f(p^{(k)}) ∈ T_p$(_l_M)``" +_field_subgradient = "`X` : the current subgradient ``$(_l_subgrad)f(p^{(k)}) ∈ T_p$_l_M``" +_field_inv_retr = "`inverse_retraction_method::`[`AbstractInverseRetractionMethod`](@extref `ManifoldsBase.AbstractInverseRetractionMethod`) : an inverse retraction ``$(_l_retr)^{-1}``" +_field_p = raw"`p`, an initial value `p` ``= p^{(0)} ∈ \mathcal M``" +_field_retr = "`retraction_method::`[`AbstractRetractionMethod`](@extref `ManifoldsBase.AbstractRetractionMethod`) : a retraction ``$(_l_retr_long)``" +_field_sub_problem = "`sub_problem::Union{`[`AbstractManoptProblem`](@ref)`, F}`: a manopt problem or a function for a closed form solution of the sub problem" +_field_sub_state = "`sub_state::Union{`[`AbstractManoptSolverState`](@ref)`,`[`AbstractEvaluationType`](@ref)`}`: for a sub problem state which solver to use, for the closed form solution function, indicate, whether the closed form solution function works with [`AllocatingEvaluation`](@ref)) `(M, p, X) -> q` or with an [`InplaceEvaluation`](@ref)) `(M, q, p, X) -> q`" +_field_stop = "`stop::`[`StoppingCriterion`](@ref) : a functor indicating when to stop and whether the algorithm has stopped" +_field_step = "`stepsize::`[`Stepsize`](@ref) : a stepsize." +_field_vector_transp = "`vector_transport_method::`[`AbstractVectorTransportMethod`](@extref `ManifoldsBase.AbstractVectorTransportMethod`) : a vector transport ``$_l_vt``" +_field_X = "`X`: a tangent vector" + +# Keywords +_kw_evaluation_default = "`evaluation=`[`AllocatingEvaluation`](@ref)`()`" +_kw_evaluation = "specify whether the functions that return an array, for example a point or a tangent vector, work by allocating its result ([`AllocatingEvaluation`](@ref)) or whether they modify their input argument to return the result therein ([`InplaceEvaluation`](@ref)). Since usually the first argument is the manifold, the modified argument is the second." +_kw_evaluation_example = "For example `grad_f(M,p)` allocates, but `grad_f!(M, X, p)` computes the result in-place of `X`." + +_kw_inverse_retraction_method_default = "`inverse_retraction_method=`[`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`)`(M, typeof(p))`" +_kw_inverse_retraction_method = "an inverse retraction ``$(_l_retr)^{-1}`` to use, see [the section on retractions and their inverses](@extref ManifoldsBase :doc:`retractions`)." + +_kw_others = raw""" +All other keyword arguments are passed to [`decorate_state!`](@ref) for state decorators or +[`decorate_objective!`](@ref) for objective, respectively. +""" + +_kw_retraction_method_default = raw"`retraction_method=`[`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`)`(M, typeof(p))`" +_kw_retraction_method = "a retraction ``$(_l_retr)`` to use, see [the section on retractions](@extref ManifoldsBase :doc:`retractions`)." + +_kw_stepsize = raw"a functor inheriting from [`Stepsize`](@ref) to determine a step size" + +_kw_stopping_criterion = raw"a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop." +_kw_stop_note = "is used to set the field `stop`." + +_kw_sub_kwargs_default = "`sub_kwargs=(;)`" +_kw_sub_kwargs = "a named tuple of keyword arguments that are passed to [`decorate_objective!`](@ref) of the sub solvers objective, the [`decorate_state!`](@ref) of the subsovlers state, and the sub state constructor itself." + +_kw_sub_objective = "a shortcut to modify the objective of the subproblem used within in the `sub_problem=` keyword" +function _kw_sub_objective_default_text(type::String) + return "By default, this is initialized as a [`$type`](@ref), which can further be decorated by using the `sub_kwargs=` keyword" +end + +_kw_vector_transport_method_default = "`vector_transport_method=`[`default_vector_transport_method`](@extref `ManifoldsBase.default_vector_transport_method-Tuple{AbstractManifold}`)`(M, typeof(p))`" +_kw_vector_transport_method = "a vector transport ``$_l_vt`` to use, see [the section on vector transports](@extref ManifoldsBase :doc:`vector_transports`)." + +_kw_X_default = raw"`X=`[`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`" +_kw_X = raw"specify a memory internally to store a tangent vector" + +function _kw_used_in(s::String) + return "This is used to define the `$s=` keyword and has hence no effect, if you set `$s` directly." +end + +function _link_zero_vector(M="M", p="p") + arg = length(M) > 0 ? "`($M, $p)`" : "" + return "[`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)$arg" +end +function _link_manifold_dimension(M="M") + arg = length(M) > 0 ? "`($M)`" : "" + return "[`manifold_dimension`](@extref `ManifoldsBase.manifold_dimension-Tuple{AbstractManifold}`)$arg" +end +function _link_rand(M="M") + arg = length(M) > 0 ? "`($M)`" : "" + return "[`rand`](@extref Base.rand-Tuple{AbstractManifold})$arg" +end diff --git a/src/plans/embedded_objective.jl b/src/plans/embedded_objective.jl index b75895c618..9799a5085d 100644 --- a/src/plans/embedded_objective.jl +++ b/src/plans/embedded_objective.jl @@ -10,8 +10,8 @@ The types can be used to still dispatch on also the undecorated objective type ` # Fields * `objective`: the objective that is defined in the embedding -* `p`: (`nothing`) a point in the embedding. -* `X`: (`nothing`) a tangent vector in the embedding +* `p=nothing`: a point in the embedding. +* `X=nothing`: a tangent vector in the embedding When a point in the embedding `p` is provided, `embed!` is used in place of this point to reduce memory allocations. Similarly `X` is used when embedding tangent vectors @@ -312,10 +312,10 @@ The returned gradient is then converted to a Riemannian gradient calling [`riemannian_gradient`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library.html#ManifoldDiff.riemannian_gradient-Tuple{AbstractManifold,%20Any,%20Any}). """ function get_grad_inequality_constraint( - M::AbstractManifold, emo::EmbeddedManifoldObjective{P,Missing}, p, j::Integer + M::AbstractManifold, emo::EmbeddedManifoldObjective{P,Missing}, p, i::Integer ) where {P} q = local_embed!(M, emo, p) - Z = get_grad_inequality_constraint(get_embedding(M), emo.objective, q, j) + Z = get_grad_inequality_constraint(get_embedding(M), emo.objective, q, i) return riemannian_gradient(M, p, Z) end function get_grad_inequality_constraint( @@ -326,10 +326,10 @@ function get_grad_inequality_constraint( return [riemannian_gradient(M, p, X) for X in Z] end function get_grad_inequality_constraint( - M::AbstractManifold, emo::EmbeddedManifoldObjective{P,T}, p, j::Integer + M::AbstractManifold, emo::EmbeddedManifoldObjective{P,T}, p, i::Integer ) where {P,T} q = local_embed!(M, emo, p) - get_grad_inequality_constraint!(get_embedding(M), emo.X, emo.objective, q, j) + get_grad_inequality_constraint!(get_embedding(M), emo.X, emo.objective, q, i) return riemannian_gradient(M, p, emo.X) end function get_grad_inequality_constraint( @@ -340,10 +340,10 @@ function get_grad_inequality_constraint( return [riemannian_gradient(M, p, X) for X in Z] end function get_grad_inequality_constraint!( - M::AbstractManifold, Y, emo::EmbeddedManifoldObjective{P,Missing}, p, j::Integer + M::AbstractManifold, Y, emo::EmbeddedManifoldObjective{P,Missing}, p, i::Integer ) where {P} q = local_embed!(M, emo, p) - Z = get_grad_inequality_constraint(get_embedding(M), emo.objective, q, j) + Z = get_grad_inequality_constraint(get_embedding(M), emo.objective, q, i) riemannian_gradient!(M, Y, p, Z) return Y end @@ -356,10 +356,10 @@ function get_grad_inequality_constraint!( return Y end function get_grad_inequality_constraint!( - M::AbstractManifold, Y, emo::EmbeddedManifoldObjective{P,T}, p, j::Integer + M::AbstractManifold, Y, emo::EmbeddedManifoldObjective{P,T}, p, i::Integer ) where {P,T} q = local_embed!(M, emo, p) - get_grad_inequality_constraint!(get_embedding(M), emo.X, emo.objective, q, j) + get_grad_inequality_constraint!(get_embedding(M), emo.X, emo.objective, q, i) riemannian_gradient!(M, Y, p, emo.X) return Y end diff --git a/src/plans/gradient_plan.jl b/src/plans/gradient_plan.jl index e629f1948a..d4ad0163ea 100644 --- a/src/plans/gradient_plan.jl +++ b/src/plans/gradient_plan.jl @@ -292,12 +292,13 @@ where ``sd_i`` is the current (inner) direction and ``η_{i-1}'`` is the vector last direction multiplied by momentum ``m``. # Fields -* `p_old`: (`rand(M)`) remember the last iterate for parallel transporting the last direction -* `momentum`: (`0.2`) factor for momentum -* `direction`: internal [`DirectionUpdateRule`](@ref) to determine directions + +* `p_old`: remember the last iterate for parallel transporting the last direction +* `momentum`: factor for momentum +* `direction`: internal [`DirectionUpdateRule`](@ref) to determine directions to add the momentum to. -* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) vector transport method to use -* `X_old`: (`zero_vector(M,x0)`) the last gradient/direction update added as momentum +* `vector_transport_method`: vector transport method to use +* `X_old`: the last gradient/direction update added as momentum # Constructors @@ -307,7 +308,8 @@ Add momentum to a gradient problem, where by default just a gradient evaluation M::AbstractManifold; p=rand(M), s::DirectionUpdateRule=IdentityUpdateRule(); - X=zero_vector(p.M, x0), momentum=0.2 + X=zero_vector(M, p), + momentum=0.2 vector_transport_method=default_vector_transport_method(M, typeof(p)), ) @@ -334,11 +336,11 @@ function MomentumGradient( ) end function (mg::MomentumGradient)( - mp::AbstractManoptProblem, s::AbstractGradientSolverState, i + mp::AbstractManoptProblem, s::AbstractGradientSolverState, k ) M = get_manifold(mp) p = get_iterate(s) - step, dir = mg.direction(mp, s, i) #get inner direction and step size + step, dir = mg.direction(mp, s, k) #get inner direction and step size mg.X_old = mg.momentum * vector_transport_to(M, mg.p_old, mg.X_old, p, mg.vector_transport_method) - @@ -398,10 +400,10 @@ function AverageGradient( gradients, p, direction, vector_transport_method ) end -function (a::AverageGradient)(mp::AbstractManoptProblem, s::AbstractGradientSolverState, i) +function (a::AverageGradient)(mp::AbstractManoptProblem, s::AbstractGradientSolverState, k) pop!(a.gradients) M = get_manifold(mp) - step, d = a.direction(mp, s, i) #get inner gradient and step + step, d = a.direction(mp, s, k) #get inner gradient and step a.gradients = vcat([deepcopy(d)], a.gradients) for i in 1:(length(a.gradients) - 1) #transport & shift in place vector_transport_to!( @@ -471,9 +473,9 @@ function Nesterov( ) where {P,T} return Nesterov{P,T}(γ, μ, copy(M, p), shrinkage, inverse_retraction_method) end -function (n::Nesterov)(mp::AbstractManoptProblem, s::AbstractGradientSolverState, i) +function (n::Nesterov)(mp::AbstractManoptProblem, s::AbstractGradientSolverState, k) M = get_manifold(mp) - h = get_stepsize(mp, s, i) + h = get_stepsize(mp, s, k) p = get_iterate(s) α = (h * (n.γ - n.μ) + sqrt(h^2 * (n.γ - n.μ)^2 + 4 * h * n.γ)) / 2 γbar = (1 - α) * n.γ + α * n.μ @@ -489,7 +491,7 @@ function (n::Nesterov)(mp::AbstractManoptProblem, s::AbstractGradientSolverState (((1 - α) * n.γ) / γbar) * inverse_retract(M, y, n.v, n.inverse_retraction_method) - (α / γbar) * gradf_yk n.v = retract(M, y, d, s.retraction_method) - n.γ = 1 / (1 + n.shrinkage(i)) * γbar + n.γ = 1 / (1 + n.shrinkage(k)) * γbar return h, (-1 / h) * inverse_retract(M, p, xn, n.inverse_retraction_method) # outer update end @@ -516,8 +518,8 @@ mutable struct DebugGradient <: DebugAction return new(io, format) end end -function (d::DebugGradient)(::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int) - (i < 1) && return nothing +function (d::DebugGradient)(::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int) + (k < 1) && return nothing Printf.format(d.io, Printf.Format(d.format), get_gradient(s)) return nothing end @@ -553,9 +555,9 @@ mutable struct DebugGradientNorm <: DebugAction end end function (d::DebugGradientNorm)( - mp::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + mp::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) - (i < 1) && return nothing + (k < 1) && return nothing Printf.format( d.io, Printf.Format(d.format), @@ -591,10 +593,10 @@ mutable struct DebugStepsize <: DebugAction end end function (d::DebugStepsize)( - p::P, s::O, i::Int + p::P, s::O, k::Int ) where {P<:AbstractManoptProblem,O<:AbstractGradientSolverState} - (i < 1) && return nothing - Printf.format(d.io, Printf.Format(d.format), get_last_stepsize(p, s, i)) + (k < 1) && return nothing + Printf.format(d.io, Printf.Format(d.format), get_last_stepsize(p, s, k)) return nothing end function show(io::IO, ds::DebugStepsize) @@ -620,9 +622,9 @@ mutable struct RecordGradient{T} <: RecordAction end RecordGradient(ξ::T) where {T} = RecordGradient{T}() function (r::RecordGradient{T})( - ::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + ::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) where {T} - return record_or_reset!(r, get_gradient(s), i) + return record_or_reset!(r, get_gradient(s), k) end show(io::IO, ::RecordGradient{T}) where {T} = print(io, "RecordGradient{$T}()") @@ -636,10 +638,10 @@ mutable struct RecordGradientNorm <: RecordAction RecordGradientNorm() = new(Array{Float64,1}()) end function (r::RecordGradientNorm)( - mp::AbstractManoptProblem, ast::AbstractManoptSolverState, i::Int + mp::AbstractManoptProblem, ast::AbstractManoptSolverState, k::Int ) M = get_manifold(mp) - return record_or_reset!(r, norm(M, get_iterate(ast), get_gradient(ast)), i) + return record_or_reset!(r, norm(M, get_iterate(ast), get_gradient(ast)), k) end show(io::IO, ::RecordGradientNorm) = print(io, "RecordGradientNorm()") @@ -652,6 +654,6 @@ mutable struct RecordStepsize <: RecordAction recorded_values::Array{Float64,1} RecordStepsize() = new(Array{Float64,1}()) end -function (r::RecordStepsize)(p::AbstractManoptProblem, s::AbstractGradientSolverState, i) - return record_or_reset!(r, get_last_stepsize(p, s, i), i) +function (r::RecordStepsize)(p::AbstractManoptProblem, s::AbstractGradientSolverState, k) + return record_or_reset!(r, get_last_stepsize(p, s, k), k) end diff --git a/src/plans/hessian_plan.jl b/src/plans/hessian_plan.jl index 3a7fda7656..1cd1861cba 100644 --- a/src/plans/hessian_plan.jl +++ b/src/plans/hessian_plan.jl @@ -201,23 +201,29 @@ An abstract supertype for approximate Hessian functions, declares them also to b """ abstract type AbstractApproxHessian <: Function end -@doc raw""" +_doc_ApproxHessian_formula = raw""" +```math +\operatorname{Hess}f(p)[X] ≈ +\frac{\lVert X \rVert_p}{c}\Bigl( + \mathcal T_{p\gets q}\bigr(\operatorname{grad}f(q)\bigl) - \operatorname{grad}f(p) +\Bigl) +``` +""" +_doc_ApproxHessian_step = raw"\operatorname{retr}_p(\frac{c}{\lVert X \rVert_p}X)" + +@doc """ ApproxHessianFiniteDifference{E, P, T, G, RTR, VTR, R <: Real} <: AbstractApproxHessian A functor to approximate the Hessian by a finite difference of gradient evaluation. -Given a point `p` and a direction `X` and the gradient ``\operatorname{grad}F: \mathcal M → T\mathcal M`` -of a function ``F`` the Hessian is approximated as follows: -let ``c`` be a stepsize, ``X∈ T_p\mathcal M`` a tangent vector and ``q = \operatorname{retr}_p(\frac{c}{\lVert X \rVert_p}X)`` +Given a point `p` and a direction `X` and the gradient ``$(_l_grad) f(p)`` +of a function ``f`` the Hessian is approximated as follows: +let ``c`` be a stepsize, ``X ∈ $(_l_TpM())`` a tangent vector and ``q = $_doc_ApproxHessian_step`` be a step in direction ``X`` of length ``c`` following a retraction -Then the Hessian is approximated by the finite difference of the gradients, where ``\mathcal T_{\cdot\gets\cdot}`` is a vector transport. +Then the Hessian is approximated by the finite difference of the gradients, +where ``$_l_vt`` is a vector transport. -```math -\operatorname{Hess}F(p)[X] ≈ -\frac{\lVert X \rVert_p}{c}\Bigl( - \mathcal T_{p\gets q}\bigr(\operatorname{grad}F(q)\bigl) - \operatorname{grad}F(p) -\Bigl) -``` +$_doc_ApproxHessian_formula # Fields @@ -238,10 +244,12 @@ Then the Hessian is approximated by the finite difference of the gradients, wher ## Keyword arguments -* `evaluation`: ([`AllocatingEvaluation`](@ref)) whether the gradient is given as an allocation function or an in-place ([`InplaceEvaluation`](@ref)). -* `steplength`: (``2^{-14}``) step length ``c`` to approximate the gradient evaluations -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a `retraction(M, p, X)` to use in the approximation. -* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport to use +* `evaluation=`[`AllocatingEvaluation`](@ref)) whether the gradient is given as an allocation function or an in-place ([`InplaceEvaluation`](@ref)). +* `steplength=`2^{-14}``: step length ``c`` to approximate the gradient evaluations +* $_kw_retraction_method_default + $_kw_retraction_method +* $_kw_vector_transport_method_default + $_kw_vector_transport_method """ mutable struct ApproxHessianFiniteDifference{E,P,T,G,RTR,VTR,R<:Real} <: AbstractApproxHessian @@ -307,18 +315,26 @@ end ApproxHessianSymmetricRankOne{E, P, G, T, B<:AbstractBasis{ℝ}, VTR, R<:Real} <: AbstractApproxHessian A functor to approximate the Hessian by the symmetric rank one update. + # Fields -* `gradient!!` the gradient function (either allocating or mutating, see `evaluation` parameter). -* `ν` a small real number to ensure that the denominator in the update does not become too small and thus the method does not break down. -* `vector_transport_method` a vector transport to use. + +* `gradient!!`: the gradient function (either allocating or mutating, see `evaluation` parameter). +* `ν`: a small real number to ensure that the denominator in the update does not become too small and thus the method does not break down. +* `vector_transport_method`: a vector transport to use. + ## Internal temporary fields -* `p_tmp` a temporary storage the current point `p`. -* `grad_tmp` a temporary storage for the gradient at the current `p`. -* `matrix` a temporary storage for the matrix representation of the approximating operator. -* `basis` a temporary storage for an orthonormal basis at the current `p`. + +* `p_tmp`: a temporary storage the current point `p`. +* `grad_tmp`: a temporary storage for the gradient at the current `p`. +* `matrix`: a temporary storage for the matrix representation of the approximating operator. +* `basis`: a temporary storage for an orthonormal basis at the current `p`. + # Constructor + ApproxHessianSymmetricRankOne(M, p, gradF; kwargs...) + ## Keyword arguments + * `initial_operator` (`Matrix{Float64}(I, manifold_dimension(M), manifold_dimension(M))`) the matrix representation of the initial approximating operator. * `basis` (`DefaultOrthonormalBasis()`) an orthonormal basis in the tangent space of the initial iterate p. * `nu` (`-1`) diff --git a/src/plans/higher_order_primal_dual_plan.jl b/src/plans/higher_order_primal_dual_plan.jl index 1aee1e2021..2acceac7e0 100644 --- a/src/plans/higher_order_primal_dual_plan.jl +++ b/src/plans/higher_order_primal_dual_plan.jl @@ -63,22 +63,24 @@ function PrimalDualManifoldSemismoothNewtonObjective( ) end -@doc raw""" +@doc """ PrimalDualSemismoothNewtonState <: AbstractPrimalDualSolverState -* `m`: base point on ``\mathcal M`` -* `n`: base point on ``\mathcal N`` -* `x`: an initial point on ``x^{(0)} ∈ \mathcal M`` (and its previous iterate) -* `ξ`: an initial tangent vector ``\xi^{(0)} ∈ T_{n}^*\mathcal N`` (and its previous iterate) -* `primal_stepsize`: (`1/sqrt(8)`) proximal parameter of the primal prox -* `dual_stepsize`: (`1/sqrt(8)`) proximal parameter of the dual prox -* `reg_param`: (`1e-5`) regularisation parameter for the Newton matrix -* `stop`: a [`StoppingCriterion`](@ref) -* `update_primal_base`: (`( amp, ams, i) -> o.m`) function to update the primal base -* `update_dual_base`: (`(amp, ams, i) -> o.n`) function to update the dual base -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) the retraction to use -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction to use. -* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport to use +# Fields + +* `m`: base point on ``$_l_M`` +* `n`: base point on ``$(_l_Manifold("N"))`` +* `x`: an initial point on ``x^{(0)} ∈ $_l_M`` (and its previous iterate) +* `ξ`: an initial tangent vector ``\\xi^{(0)} ∈ T_{n}^*$(_l_Manifold("N"))`` (and its previous iterate) +* `primal_stepsize::Float64`: proximal parameter of the primal prox +* `dual_stepsize::Float64`: proximal parameter of the dual prox +* `reg_param::Float64`: regularisation parameter for the Newton matrix +* `stop::`[`StoppingCriterion`](@ref): a [`StoppingCriterion`](@ref) +* `update_primal_base`: function to update the primal base +* `update_dual_base`: function to update the dual base +* $_field_retr +* $_field_inv_retr +* $_field_vector_transp where for the update functions a [`AbstractManoptProblem`](@ref) `amp`, [`AbstractManoptSolverState`](@ref) `ams` and the current iterate `i` are the arguments. @@ -86,15 +88,27 @@ If you activate these to be different from the default identity, you have to pro `p.Λ` for the algorithm to work (which might be `missing`). # Constructor - PrimalDualSemismoothNewtonState(M::AbstractManifold, - m::P, n::Q, x::P, ξ::T, primal_stepsize::Float64, dual_stepsize::Float64, reg_param::Float64; - stopping_criterion::StoppingCriterion = StopAfterIteration(50), - update_primal_base::Union{Function,Missing} = missing, - update_dual_base::Union{Function,Missing} = missing, - retraction_method = default_retraction_method(M, typeof(p)), - inverse_retraction_method = default_inverse_retraction_method(M, typeof(p)), - vector_transport_method = default_vector_transport_method(M, typeof(p)), - ) + + PrimalDualSemismoothNewtonState(M::AbstractManifold, m::P, n::Q, x::P, ξ::T; kwargs...) + +Generate a state for the [`primal_dual_semismooth_Newton`](@ref). + +## Keyword arguments + +* `primal_stepsize=1/sqrt(8)` +* `dual_stepsize=1/sqrt(8)` +* `reg_param=1e-5` +* `update_primal_base=(amp, ams, k) -> o.m` +* `update_dual_base=(amp, ams, k) -> o.n` +* $_kw_retraction_method_default: + $_kw_retraction_method +* $_kw_inverse_retraction_method_default: + $_kw_inverse_retraction_method +* `stopping_criterion=`[`StopAfterIteration`](@ref)(50): + $_kw_stopping_criterion +* $_kw_vector_transport_method_default: + $_kw_vector_transport_method + """ mutable struct PrimalDualSemismoothNewtonState{ P, diff --git a/src/plans/interior_point_Newton_plan.jl b/src/plans/interior_point_Newton_plan.jl index f965c2f97f..4b70f04aeb 100644 --- a/src/plans/interior_point_Newton_plan.jl +++ b/src/plans/interior_point_Newton_plan.jl @@ -26,7 +26,7 @@ get_gradient(s::StepsizeState) = s.X set_iterate!(s::StepsizeState, M, p) = copyto!(M, s.p, p) set_gradient!(s::StepsizeState, M, p, X) = copyto!(M, s.X, p, X) -@doc raw""" +@doc """ InteriorPointNewtonState <: AbstractHessianSolverState # Fields @@ -65,11 +65,11 @@ are used to fill in reasonable defaults for the keywords. # Input -* `M`: a Riemannian manifold +$(_arg_M) * `cmo`: a [`ConstrainedManifoldObjective`](@ref) -* `p`: a point on `M` as the inital point of the algorithm -* `sub_problem`: an [`AbstractManoptProblem`](@ref) problem for the sub solver -* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the sub solver +$(_arg_p) +$(_arg_sub_problem) +$(_arg_sub_state) # Keyword arguments @@ -88,7 +88,7 @@ Let `m` and `n` denote the number of inequality and equality constraints, respec * `retraction_method=default_retraction_method(M, typeof(p))` * `step_objective=`[`ManifoldGradientObjective`](@ref)`(`[`KKTVectorFieldNormSq`](@ref)`(cmo)`, [`KKTVectorFieldNormSqGradient`](@ref)`(cmo)`; evaluation=[`InplaceEvaluation`](@ref)`())` * `vector_space=`[`Rn`](@ref Manopt.Rn): a function that, given an integer, returns the manifold to be used for the vector space components ``ℝ^m,ℝ^n`` -* `step_problem`: wrap the manifold ``\mathcal M × ℝ^m × ℝ^n × ℝ^m`` +* `step_problem`: wrap the manifold ``$(_l_M) × ℝ^m × ℝ^n × ℝ^m`` * `step_state`: the [`StepsizeState`](@ref) with point and search direction * `stepsize`: an [`ArmijoLinesearch`](@ref) with the [`InteriorPointCentralityCondition`](@ref) as additional condition to accept a step. Note that this step size operates on its own `step_problem`and `step_state` @@ -895,8 +895,8 @@ for the positive and negative part of ``v``, respectively # Fields * `ε`: a threshold +* `residual`: store the last residual if the stopping criterion is hit. * `at_iteration`: - """ mutable struct StopWhenKKTResidualLess{R} <: StoppingCriterion ε::R @@ -953,7 +953,7 @@ end # An internal function to compute the new σ @doc raw""" - calculate_σ(M, cmo, p, μ, λ, s) + calculate_σ(M, cmo, p, μ, λ, s; kwargs...) Compute the new ``σ`` factor for the barrier parameter in [`interior_point_Newton`](@ref) as @@ -966,7 +966,7 @@ where ``F`` is the KKT vector field, hence the [`KKTVectorFieldNormSq`](@ref) is * `vector_space=`[`Rn`](@ref Manopt.Rn) a function that, given an integer, returns the manifold to be used for the vector space components ``ℝ^m,ℝ^n`` * `N` the manifold ``\mathcal M × ℝ^m × ℝ^n × ℝ^m`` the vector field lives on (generated using `vector_space`) -* `q` provide memory on `N` for interims computations +* `q` provide memory on `N` for interims evaluation of the vector field """ function calculate_σ( N::AbstractManifold, cmo::AbstractDecoratedManifoldObjective, p, μ, λ, s; kwargs... diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 34e56cf9ae..b49b26e065 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -1,5 +1,5 @@ -@doc raw""" +@doc """ NonlinearLeastSquaresObjective{T<:AbstractEvaluationType} <: AbstractManifoldObjective{T} A type for nonlinear least squares problems. @@ -8,7 +8,7 @@ A type for nonlinear least squares problems. Specify a nonlinear least squares problem # Fields -* `f` a function ``f: \mathcal M → ℝ^d`` to minimize +* `f` a function ``f: $(_l_M) → ℝ^d`` to minimize * `jacobian!!` Jacobian of the function ``f`` * `jacobian_tangent_basis` the basis of tangent space used for computing the Jacobian. * `num_components` number of values returned by `f` (equal to `d`). @@ -132,7 +132,7 @@ function get_gradient!( return X end -@doc raw""" +@doc """ LevenbergMarquardtState{P,T} <: AbstractGradientSolverState Describes a Gradient based descent algorithm, with @@ -141,31 +141,41 @@ Describes a Gradient based descent algorithm, with A default value is given in brackets if a parameter can be left out in initialization. -* `x`: a point (of type `P`) on a manifold as starting point -* `stop`: (`StopAfterIteration(200) | StopWhenGradientNormLess(1e-12) | StopWhenStepsizeLess(1e-12)`) - a [`StoppingCriterion`](@ref) -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) the retraction to use, - defaults to the default set for your manifold. -* `residual_values` value of ``F`` calculated in the solver setup or the previous iteration -* `residual_values_temp` value of ``F`` for the current proposal point -* `jacF` the current Jacobian of ``F`` -* `gradient` the current gradient of ``F`` -* `step_vector` the tangent vector at `x` that is used to move to the next point -* `last_stepsize` length of `step_vector` -* `η` Scaling factor for the sufficient cost decrease threshold required +* $_field_iterate +* $_field_stop +* $_field_retr +* `residual_values`: value of ``F`` calculated in the solver setup or the previous iteration +* `residual_values_temp`: value of ``F`` for the current proposal point +* `jacF`: the current Jacobian of ``F`` +* `gradient`: the current gradient of ``F`` +* `step_vector`: the tangent vector at `x` that is used to move to the next point +* `last_stepsize`: length of `step_vector` +* `η`: Scaling factor for the sufficient cost decrease threshold required to accept new proposal points. Allowed range: `0 < η < 1`. -* `damping_term` current value of the damping term -* `damping_term_min` initial (and also minimal) value of the damping term -* `β` parameter by which the damping term is multiplied when the current +* `damping_term`: current value of the damping term +* `damping_term_min`: initial (and also minimal) value of the damping term +* `β`: parameter by which the damping term is multiplied when the current new point is rejected -* `expect_zero_residual`: (`false`) if true, the algorithm expects that the value of +* `expect_zero_residual`: if true, the algorithm expects that the value of the residual (objective) at minimum is equal to 0. # Constructor - LevenbergMarquardtState(M, initialX, initial_residual_values, initial_jacF; initial_vector), kwargs...) + LevenbergMarquardtState(M, p, initial_residual_values, initial_jacF; kwargs...) -Generate Levenberg-Marquardt options. +Generate the Levenberg-Marquardt solver state. + +# Keyword arguments + +The following fields are keyword arguments + +* `β=5.0` +* `damping_term_min=0.1` +* `η=0.2`, +* `expect_zero_residual=false` +* `initial_gradient=`$(_link_zero_vector()) +* $_kw_retraction_method_default +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`$_sc_any[`StopWhenGradientNormLess`](@ref)`(1e-12)`$_sc_any[`StopWhenStepsizeLess`](@ref)`(1e-12)` # See also diff --git a/src/plans/plan.jl b/src/plans/plan.jl index e299d5e2d7..d6cf2cf8af 100644 --- a/src/plans/plan.jl +++ b/src/plans/plan.jl @@ -103,6 +103,7 @@ by `set_manopt_parameter!(:Mode, "")`. """ is_tutorial_mode() = (get_manopt_parameter(:Mode) == "Tutorial") +include("docstring_snippets.jl") include("objective.jl") include("problem.jl") include("solver_state.jl") diff --git a/src/plans/primal_dual_plan.jl b/src/plans/primal_dual_plan.jl index 4d1d6b604e..eec88ac43e 100644 --- a/src/plans/primal_dual_plan.jl +++ b/src/plans/primal_dual_plan.jl @@ -42,7 +42,7 @@ depending on the `evaluation=` keyword in the constructor and stored in `T <: Ab * `linearized_adjoint_operator!!`: the adjoint differential ``(DΛ)^* : \mathcal N → T\mathcal M`` * `prox_f!!`: the proximal map belonging to ``f`` * `prox_G_dual!!`: the proximal map belonging to ``g_n^*`` -* `Λ!!`: (`fordward_operator`) the forward operator (if given) ``Λ: \mathcal M → \mathcal N`` +* `Λ!!`: the forward operator (if given) ``Λ: \mathcal M → \mathcal N`` Either the linearized operator ``DΛ`` or ``Λ`` are required usually. @@ -695,12 +695,13 @@ The constructor accepts a printing function and some (shared) storage, which should at least record `:Iterate`, `:X` and `:n`. # Constructor -DebugDualResidual() +DebugDualResidual(; kwargs...) -with the keywords -* `io` (`stdout`) - stream to perform the debug to -* format (`"$prefix%s"`) format to print the dual residual, using the -* `prefix` (`"Dual Residual: "`) short form to just set the prefix +# Keyword warguments + +* `io=`stdout`: stream to perform the debug to +* `format="$prefix%s"`: format to print the dual residual, using the +* `prefix="Dual Residual: "`: short form to just set the prefix * `storage` (a new [`StoreStateAction`](@ref)) to store values for the debug. """ mutable struct DebugDualResidual <: DebugAction @@ -729,12 +730,12 @@ mutable struct DebugDualResidual <: DebugAction end end function (d::DebugDualResidual)( - tmp::TwoManifoldProblem, apds::AbstractPrimalDualSolverState, i::Int + tmp::TwoManifoldProblem, apds::AbstractPrimalDualSolverState, k::Int ) M = get_manifold(tmp, 1) N = get_manifold(tmp, 2) apdmo = get_objective(tmp) - if all(has_storage.(Ref(d.storage), [:Iterate, :X, :n])) && i > 0 # all values stored + if all(has_storage.(Ref(d.storage), [:Iterate, :X, :n])) && k > 0 # all values stored #fetch p_old = get_storage(d.storage, :Iterate) X_old = get_storage(d.storage, :X) @@ -745,7 +746,7 @@ function (d::DebugDualResidual)( dual_residual(M, N, apdmo, apds, p_old, X_old, n_old), ) end - return d.storage(tmp, apds, i) + return d.storage(tmp, apds, k) end @doc raw""" DebugPrimalResidual <: DebugAction @@ -756,12 +757,13 @@ should at least record `:Iterate`, `:X` and `:n`. # Constructor - DebugPrimalResidual() + DebugPrimalResidual(; kwargs...) -with the keywords -* `io` (`stdout`) - stream to perform the debug to -* format (`"$prefix%s"`) format to print the dual residual, using the -* `prefix` (`"Primal Residual: "`) short form to just set the prefix +# Keyword warguments + +* `io=`stdout`: stream to perform the debug to +* `format="$prefix%s"`: format to print the dual residual, using the +* `prefix="Primal Residual: "`: short form to just set the prefix * `storage` (a new [`StoreStateAction`](@ref)) to store values for the debug. """ mutable struct DebugPrimalResidual <: DebugAction @@ -788,12 +790,12 @@ mutable struct DebugPrimalResidual <: DebugAction end end function (d::DebugPrimalResidual)( - tmp::TwoManifoldProblem, apds::AbstractPrimalDualSolverState, i::Int + tmp::TwoManifoldProblem, apds::AbstractPrimalDualSolverState, k::Int ) M = get_manifold(tmp, 1) N = get_manifold(tmp, 2) apdmo = get_objective(tmp) - if all(has_storage.(Ref(d.storage), [:Iterate, :X, :n])) && i > 0 # all values stored + if all(has_storage.(Ref(d.storage), [:Iterate, :X, :n])) && k > 0 # all values stored #fetch p_old = get_storage(d.storage, :Iterate) X_old = get_storage(d.storage, :X) @@ -804,7 +806,7 @@ function (d::DebugPrimalResidual)( primal_residual(M, N, apdmo, apds, p_old, X_old, n_old), ) end - return d.storage(tmp, apds, i) + return d.storage(tmp, apds, k) end @doc raw""" DebugPrimalDualResidual <: DebugAction @@ -818,9 +820,12 @@ should at least record `:Iterate`, `:X` and `:n`. DebugPrimalDualResidual() with the keywords -* `io` (`stdout`) - stream to perform the debug to -* format (`"$prefix%s"`) format to print the dual residual, using the -* `prefix` (`"Primal Residual: "`) short form to just set the prefix + +# Keyword warguments + +* `io=`stdout`: stream to perform the debug to +* `format="$prefix%s"`: format to print the dual residual, using the +* `prefix="PD Residual: "`: short form to just set the prefix * `storage` (a new [`StoreStateAction`](@ref)) to store values for the debug. """ mutable struct DebugPrimalDualResidual <: DebugAction @@ -847,12 +852,12 @@ mutable struct DebugPrimalDualResidual <: DebugAction end end function (d::DebugPrimalDualResidual)( - tmp::TwoManifoldProblem, apds::AbstractPrimalDualSolverState, i::Int + tmp::TwoManifoldProblem, apds::AbstractPrimalDualSolverState, k::Int ) M = get_manifold(tmp, 1) N = get_manifold(tmp, 2) apdmo = get_objective(tmp) - if all(has_storage.(Ref(d.storage), [:Iterate, :X, :n])) && i > 0 # all values stored + if all(has_storage.(Ref(d.storage), [:Iterate, :X, :n])) && k > 0 # all values stored #fetch p_old = get_storage(d.storage, :Iterate) X_old = get_storage(d.storage, :X) @@ -862,7 +867,7 @@ function (d::DebugPrimalDualResidual)( dual_residual(tmp, apds, p_old, X_old, n_old) Printf.format(d.io, Printf.Format(d.format), v / manifold_dimension(M)) end - return d.storage(tmp, apds, i) + return d.storage(tmp, apds, k) end # @@ -932,10 +937,10 @@ mutable struct DebugDualChange <: DebugAction end end function (d::DebugDualChange)( - tmp::TwoManifoldProblem, apds::AbstractPrimalDualSolverState, i::Int + tmp::TwoManifoldProblem, apds::AbstractPrimalDualSolverState, k::Int ) N = get_manifold(tmp, 2) - if all(has_storage.(Ref(d.storage), [:X, :n])) && i > 0 # all values stored + if all(has_storage.(Ref(d.storage), [:X, :n])) && k > 0 # all values stored #fetch X_old = get_storage(d.storage, :X) n_old = get_storage(d.storage, :n) @@ -948,7 +953,7 @@ function (d::DebugDualChange)( ) Printf.format(d.io, Printf.Format(d.format), v) end - return d.storage(tmp, apds, i) + return d.storage(tmp, apds, k) end """ diff --git a/src/plans/proximal_plan.jl b/src/plans/proximal_plan.jl index 8eabdaca34..9cce485c1c 100644 --- a/src/plans/proximal_plan.jl +++ b/src/plans/proximal_plan.jl @@ -9,14 +9,22 @@ specify a problem for solvers based on the evaluation of proximal maps. # Fields -* `cost` - a function ``F:\mathcal M→ℝ`` to + +* `cost`: a function ``F:\mathcal M→ℝ`` to minimize -* `proxes` - proximal maps ``\operatorname{prox}_{λ\varphi}:\mathcal M→\mathcal M`` +* `proxes`: proximal maps ``\operatorname{prox}_{λ\varphi}:\mathcal M→\mathcal M`` as functions `(M, λ, p) -> q`. -* `number_of_proxes` - (`ones(length(proxes))`` number of proximal maps per function, +* `number_of_proxes`: number of proximal maps per function, to specify when one of the maps is a combined one such that the proximal maps functions return more than one entry per function, you have to adapt this value. if not specified, it is set to one prox per function. + +# Constructor + + ManifoldProximalMapObjective(cost, proxes, numer_of_proxes=onex(length(proxes)); + evaluation=Allocating) + + # See also [`cyclic_proximal_point`](@ref), [`get_cost`](@ref), [`get_proximal_map`](@ref) @@ -37,11 +45,11 @@ mutable struct ManifoldProximalMapObjective{E<:AbstractEvaluationType,TC,TP,V} < ) end function ManifoldProximalMapObjective( - f, + f::F, proxes_f::Union{Tuple,AbstractVector}, nOP::Vector{<:Integer}; - evaluation::AbstractEvaluationType=AllocatingEvaluation(), - ) + evaluation::E=AllocatingEvaluation(), + ) where {E<:AbstractEvaluationType,F} return if length(nOP) != length(proxes_f) throw( ErrorException( @@ -49,7 +57,7 @@ mutable struct ManifoldProximalMapObjective{E<:AbstractEvaluationType,TC,TP,V} < ), ) else - new{typeof(evaluation),typeof(f),typeof(proxes_f),typeof(nOP)}(f, proxes_f, nOP) + new{E,F,typeof(proxes_f),typeof(nOP)}(f, proxes_f, nOP) end end end @@ -124,26 +132,28 @@ end # Proximal based State # # -@doc raw""" +@doc """ CyclicProximalPointState <: AbstractManoptSolverState stores options for the [`cyclic_proximal_point`](@ref) algorithm. These are the # Fields -* `p`: the current iterate -* `stopping_criterion`: a [`StoppingCriterion`](@ref) -* `λ`: (@(i) -> 1/i) a function for the values of ``λ_k`` per iteration(cycle ``ì`` -* `oder_type`: (`:LinearOrder`) whether to use a randomly permuted sequence (`:FixedRandomOrder`), + +* $_field_p +* $_field_stop +* `λ`: a function for the values of ``λ_k`` per iteration(cycle ``ì`` +* `oder_type`: whether to use a randomly permuted sequence (`:FixedRandomOrder`), a per cycle permuted sequence (`:RandomOrder`) or the default linear one. # Constructor - CyclicProximalPointState(M, p) + + CyclicProximalPointState(M, p; kwargs...) Generate the options with the following keyword arguments -* `stopping_criterion`: (`StopAfterIteration(2000)`) a [`StoppingCriterion`](@ref). -* `λ`: ( `i -> 1.0 / i`) a function to compute the ``λ_k, k ∈ \mathbb N``, -* `evaluation_order`: (`:LinearOrder`) a Symbol indicating the order the proximal maps are applied. +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(2000)` +* `λ=i -> 1.0 / i` a function to compute the ``λ_k, k ∈ $(_l_Manifold("N"))``, +* `evaluation_order=:LinearOrder`: soecify the `order_type` # See also @@ -197,9 +207,9 @@ mutable struct DebugProximalParameter <: DebugAction end end function (d::DebugProximalParameter)( - ::AbstractManoptProblem, cpps::CyclicProximalPointState, i::Int + ::AbstractManoptProblem, cpps::CyclicProximalPointState, k::Int ) - (i > 0) && Printf.format(d.io, Printf.Format(d.format), cpps.λ(i)) + (k > 0) && Printf.format(d.io, Printf.Format(d.format), cpps.λ(k)) return nothing end @@ -216,7 +226,7 @@ mutable struct RecordProximalParameter <: RecordAction RecordProximalParameter() = new(Array{Float64,1}()) end function (r::RecordProximalParameter)( - ::AbstractManoptProblem, cpps::CyclicProximalPointState, i::Int + ::AbstractManoptProblem, cpps::CyclicProximalPointState, k::Int ) - return record_or_reset!(r, cpps.λ(i), i) + return record_or_reset!(r, cpps.λ(k), k) end diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl index 98588086d2..f0bd5eb5b4 100644 --- a/src/plans/quasi_newton_plan.jl +++ b/src/plans/quasi_newton_plan.jl @@ -290,39 +290,48 @@ mutable struct InverseBroyden <: AbstractQuasiNewtonUpdateRule end InverseBroyden(φ::Float64) = InverseBroyden(φ, :constant) -@doc raw""" +_doc_QN_H_update = raw"``H_k \mapsto H_{k+1}``" +_doc_QN_B_update = raw"``B_k \mapsto B_{k+1}``" +_doc_QN_H_full_system = raw""" +```math +\text{Solve} \quad \hat{η_k} = - H_k \widehat{\operatorname{grad}f(x_k)}, +``` +""" +_doc_QN_B_full_system = raw""" +```math +\hat{η_k} = - B_k \widehat{\operatorname{grad}f(x_k)}, +``` +""" + +@doc """ QuasiNewtonMatrixDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate The `QuasiNewtonMatrixDirectionUpdate` represent a quasi-Newton update rule, where the operator is stored as a matrix. A distinction is made between the update of the -approximation of the Hessian, ``H_k \mapsto H_{k+1}``, and the update of the approximation -of the Hessian inverse, ``B_k \mapsto B_{k+1}``. +approximation of the Hessian, $_doc_QN_H_update, and the update of the approximation +of the Hessian inverse, $_doc_QN_B_update. For the first case, the coordinates of the search direction ``η_k`` with respect to -a basis ``\{b_i\}^{n}_{i=1}`` are determined by solving a linear system of equations +a basis ``$(_math_sequence("b", "i", "1", "n"))`` are determined by solving a linear system of equations -```math -\text{Solve} \quad \hat{η_k} = - H_k \widehat{\operatorname{grad}f(x_k)}, -``` +$_doc_QN_H_full_system -where ``H_k`` is the matrix representing the operator with respect to the basis ``\{b_i\}^{n}_{i=1}`` -and ``\widehat{\operatorname{grad}f(x_k)}`` represents the coordinates of the gradient of -the objective function ``f`` in ``x_k`` with respect to the basis ``\{b_i\}^{n}_{i=1}``. +where ``H_k`` is the matrix representing the operator with respect to the basis ``$(_math_sequence("b", "i", "1", "n"))`` +and ``\\widehat{$_l_grad} f(p_k)}`` represents the coordinates of the gradient of +the objective function ``f`` in ``x_k`` with respect to the basis ``$(_math_sequence("b", "i", "1", "n"))``. If a method is chosen where Hessian inverse is approximated, the coordinates of the search -direction ``η_k`` with respect to a basis ``\{b_i\}^{n}_{i=1}`` are obtained simply by +direction ``η_k`` with respect to a basis ``$(_math_sequence("b", "i", "1", "n"))`` are obtained simply by matrix-vector multiplication -```math -\hat{η_k} = - B_k \widehat{\operatorname{grad}f(x_k)}, -``` +$_doc_QN_B_full_system -where ``B_k`` is the matrix representing the operator with respect to the basis ``\{b_i\}^{n}_{i=1}`` -and ``\widehat{\operatorname{grad}f(x_k)}``. In the end, the search direction ``η_k`` is -generated from the coordinates ``\hat{eta_k}`` and the vectors of the basis ``\{b_i\}^{n}_{i=1}`` +where ``B_k`` is the matrix representing the operator with respect to the basis ``$(_math_sequence("b", "i", "1", "n"))`` +and `\\widehat{$_l_grad} f(p_k)}``. In the end, the search direction ``η_k`` is +generated from the coordinates ``\\hat{eta_k}`` and the vectors of the basis ``$(_math_sequence("b", "i", "1", "n"))`` in both variants. The [`AbstractQuasiNewtonUpdateRule`](@ref) indicates which quasi-Newton update rule is used. In all of them, the Euclidean update formula is used to generate the matrix ``H_{k+1}`` -and ``B_{k+1}``, and the basis ``\{b_i\}^{n}_{i=1}`` is transported into the upcoming tangent -space ``T_{x_{k+1}} \mathcal{M}``, preferably with an isometric vector transport, or generated there. +and ``B_{k+1}``, and the basis ``$(_math_sequence("b", "i", "1", "n"))`` is transported into the upcoming tangent +space ``T_{p_{k+1}} $_l_M``, preferably with an isometric vector transport, or generated there. # Provided functors @@ -332,13 +341,13 @@ space ``T_{x_{k+1}} \mathcal{M}``, preferably with an isometric vector transport # Fields * `basis`: an `AbstractBasis` to use in the tangent spaces -* `matrix`: (`Matrix{Float64}(I, manifold_dimension(M), manifold_dimension(M))`) - the matrix which represents the approximating operator. +* `matrix`: the matrix which represents the approximating operator. * `scale`: (`true) indicates whether the initial matrix (= identity matrix) should be scaled before the first update. * `update`: a [`AbstractQuasiNewtonUpdateRule`](@ref). -* `vector_transport_method`: (`vector_transport_method`)an `AbstractVectorTransportMethod` +* $_field_vector_transp # Constructor + QuasiNewtonMatrixDirectionUpdate( M::AbstractManifold, update, @@ -349,15 +358,16 @@ space ``T_{x_{k+1}} \mathcal{M}``, preferably with an isometric vector transport ## Keyword arguments -* `scale`, `vector_transport_method` for the two fields +* `scale=true` +* $_kw_vector_transport_method_default Generate the Update rule with defaults from a manifold and the names corresponding to the fields. # See also -[`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref) -[`QuasiNewtonCautiousDirectionUpdate`](@ref) -[`AbstractQuasiNewtonDirectionUpdate`](@ref) +[`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref), +[`QuasiNewtonCautiousDirectionUpdate`](@ref), +[`AbstractQuasiNewtonDirectionUpdate`](@ref), """ mutable struct QuasiNewtonMatrixDirectionUpdate{ NT<:AbstractQuasiNewtonUpdateRule, diff --git a/src/plans/record.jl b/src/plans/record.jl index c961e13dad..d639f9313d 100644 --- a/src/plans/record.jl +++ b/src/plans/record.jl @@ -4,9 +4,9 @@ A `RecordAction` is a small functor to record values. The usual call is given by - (amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i) -> s + (amp::AbstractManoptProblem, ams::AbstractManoptSolverState, k) -> s -that performs the record for the current problem and solver combination, and where `i` is +that performs the record for the current problem and solver combination, and where `k` is the current iteration. By convention `i=0` is interpreted as "For Initialization only," so only @@ -178,7 +178,7 @@ end return the recorded values stored within a [`RecordAction`](@ref) `r`. """ get_record(r::RecordAction) = r.recorded_values -get_record(r::RecordAction, i) = r.recorded_values +get_record(r::RecordAction, k) = r.recorded_values """ get_index(rs::RecordSolverState, s::Symbol) @@ -195,16 +195,16 @@ getindex(rs::RecordSolverState, s::Symbol) = get_record(rs, s) getindex(rs::RecordSolverState, s::Symbol, i...) = get_record_action(rs, s)[i...] """ - record_or_reset!(r,v,i) + record_or_reset!(r, v, k) -either record (`i>0` and not `Inf`) the value `v` within the [`RecordAction`](@ref) `r` -or reset (`i<0`) the internal storage, where `v` has to match the internal +either record (`k>0` and not `Inf`) the value `v` within the [`RecordAction`](@ref) `r` +or reset (`k<0`) the internal storage, where `v` has to match the internal value type of the corresponding [`RecordAction`](@ref). """ -function record_or_reset!(r::RecordAction, v, i::Int) - if i > 0 +function record_or_reset!(r::RecordAction, v, k::Int) + if k > 0 push!(r.recorded_values, deepcopy(v)) - elseif i < 0 # reset if negative + elseif k < 0 # reset if negative r.recorded_values = empty(r.recorded_values) # Reset to empty end end @@ -216,7 +216,7 @@ end @doc raw""" RecordEvery <: RecordAction -record only every $i$th iteration. +record only every ``k``th iteration. Otherwise (optionally, but activated by default) just update internal tracking values. @@ -231,12 +231,12 @@ mutable struct RecordEvery <: RecordAction end end function (re::RecordEvery)( - amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i::Int + amp::AbstractManoptProblem, ams::AbstractManoptSolverState, k::Int ) - if i <= 0 - re.record(amp, ams, i) - elseif (rem(i, re.every) == 0) - re.record(amp, ams, i) + if k <= 0 + re.record(amp, ams, k) + elseif (rem(k, re.every) == 0) + re.record(amp, ams, k) elseif re.always_update re.record(amp, ams, 0) end @@ -244,7 +244,7 @@ function (re::RecordEvery)( # note that since recording is happening at the end # sets activity for the _next_ iteration set_manopt_parameter!( - ams, :SubState, :Record, :Activity, !(i < 1) && (rem(i + 1, re.every) == 0) + ams, :SubState, :Record, :Activity, !(k < 1) && (rem(k + 1, re.every) == 0) ) return nothing end @@ -261,8 +261,8 @@ function status_summary(re::RecordEvery) return "[$s, $(re.every)]" end get_record(r::RecordEvery) = get_record(r.record) -get_record(r::RecordEvery, i) = get_record(r.record, i) -getindex(r::RecordEvery, i) = get_record(r, i) +get_record(r::RecordEvery, k) = get_record(r.record, k) +getindex(r::RecordEvery, k) = get_record(r, k) """ RecordGroup <: RecordAction @@ -330,9 +330,9 @@ mutable struct RecordGroup <: RecordAction end RecordGroup() = new(Array{RecordAction,1}(), Dict{Symbol,Int}()) end -function (d::RecordGroup)(p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int) +function (d::RecordGroup)(p::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int) for ri in d.group - ri(p, s, i) + ri(p, s, k) end end function status_summary(rg::RecordGroup) @@ -348,7 +348,7 @@ end return an array of tuples, where each tuple is a recorded set per iteration or record call. - get_record(r::RecordGruop, i::Int) + get_record(r::RecordGruop, k::Int) return an array of values corresponding to the `i`th entry in this record group @@ -409,9 +409,9 @@ function RecordSubsolver(; return RecordSubsolver{record_type}(record_type[], r) end function (rsr::RecordSubsolver)( - ::AbstractManoptProblem, ams::AbstractManoptSolverState, i::Int + ::AbstractManoptProblem, ams::AbstractManoptSolverState, k::Int ) - record_or_reset!(rsr, get_record(get_sub_state(ams), rsr.record...), i) + record_or_reset!(rsr, get_record(get_sub_state(ams), rsr.record...), k) return nothing end function show(io::IO, rsr::RecordSubsolver{R}) where {R} @@ -449,12 +449,12 @@ mutable struct RecordWhenActive{R<:RecordAction} <: RecordAction end function (rwa::RecordWhenActive)( - amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i::Int + amp::AbstractManoptProblem, ams::AbstractManoptSolverState, k::Int ) if rwa.active - rwa.record(amp, ams, i) - elseif (rwa.always_update) && (i <= 0) - rwa.record(amp, ams, i) + rwa.record(amp, ams, k) + elseif (rwa.always_update) && (k <= 0) + rwa.record(amp, ams, k) end end function show(io::IO, rwa::RecordWhenActive) @@ -493,8 +493,8 @@ mutable struct RecordCost <: RecordAction recorded_values::Array{Float64,1} RecordCost() = new(Array{Float64,1}()) end -function (r::RecordCost)(amp::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int) - return record_or_reset!(r, get_cost(amp, get_iterate(s)), i) +function (r::RecordCost)(amp::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int) + return record_or_reset!(r, get_cost(amp, get_iterate(s)), k) end show(io::IO, ::RecordCost) = print(io, "RecordCost()") status_summary(di::RecordCost) = ":Cost" @@ -561,7 +561,7 @@ mutable struct RecordChange{ return new{IRT,typeof(a)}(Vector{Float64}(), a, inverse_retraction_method) end end -function (r::RecordChange)(amp::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int) +function (r::RecordChange)(amp::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int) M = get_manifold(amp) record_or_reset!( r, @@ -575,9 +575,9 @@ function (r::RecordChange)(amp::AbstractManoptProblem, s::AbstractManoptSolverSt else 0.0 end, - i, + k, ) - r.storage(amp, s, i) + r.storage(amp, s, k) return r.recorded_values end function show(io::IO, rc::RecordChange) @@ -651,7 +651,7 @@ mutable struct RecordEntryChange{TStorage<:StoreStateAction} <: RecordAction end end function (r::RecordEntryChange)( - amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i::Int + amp::AbstractManoptProblem, ams::AbstractManoptSolverState, k::Int ) value = 0.0 if has_storage(r.storage, r.field) @@ -659,8 +659,8 @@ function (r::RecordEntryChange)( amp, ams, getfield(ams, r.field), get_storage(r.storage, r.field) ) end - r.storage(amp, ams, i) - return record_or_reset!(r, value, i) + r.storage(amp, ams, k) + return record_or_reset!(r, value, k) end function show(io::IO, rec::RecordEntryChange) return print(io, "RecordEntryChange(:$(rec.field), $(rec.distance))") @@ -709,8 +709,8 @@ mutable struct RecordIteration <: RecordAction recorded_values::Array{Int,1} RecordIteration() = new(Array{Int,1}()) end -function (r::RecordIteration)(::AbstractManoptProblem, ::AbstractManoptSolverState, i::Int) - return record_or_reset!(r, i, i) +function (r::RecordIteration)(::AbstractManoptProblem, ::AbstractManoptSolverState, k::Int) + return record_or_reset!(r, k, k) end show(io::IO, ::RecordIteration) = print(io, "RecordIteration()") status_summary(::RecordIteration) = ":Iteration" @@ -725,10 +725,10 @@ mutable struct RecordStoppingReason <: RecordAction end RecordStoppingReason() = RecordStoppingReason(String[]) function (rsr::RecordStoppingReason)( - ::AbstractManoptProblem, ams::AbstractManoptSolverState, i::Int + ::AbstractManoptProblem, ams::AbstractManoptSolverState, k::Int ) s = get_reason(get_stopping_criterion(ams)) - return (length(s) > 0) && record_or_reset!(rsr, s, i) + return (length(s) > 0) && record_or_reset!(rsr, s, k) end show(io::IO, ::RecordStoppingReason) = print(io, "RecordStoppingReason()") status_summary(di::RecordStoppingReason) = ":Stop" @@ -757,16 +757,16 @@ mutable struct RecordTime <: RecordAction return new(Array{Nanosecond,1}(), Nanosecond(time_ns()), mode) end end -function (r::RecordTime)(p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int) +function (r::RecordTime)(p::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int) # At iteration zero also reset start - (i == 0) && (r.start = Nanosecond(time_ns())) + (k == 0) && (r.start = Nanosecond(time_ns())) t = Nanosecond(time_ns()) - r.start (r.mode == :iterative) && (r.start = Nanosecond(time_ns())) if r.mode == :total # only record at end (if `stop_solver` returns true) - return record_or_reset!(r, t, (i > 0 && stop_solver!(p, s, i)) ? i : 0) + return record_or_reset!(r, t, (k > 0 && stop_solver!(p, s, k)) ? k : 0) else - return record_or_reset!(r, t, i) + return record_or_reset!(r, t, k) end end function show(io::IO, ri::RecordTime) diff --git a/src/plans/solver_state.jl b/src/plans/solver_state.jl index c6b489e395..77b56dc8b6 100644 --- a/src/plans/solver_state.jl +++ b/src/plans/solver_state.jl @@ -337,8 +337,8 @@ internal storage for [`AbstractStateAction`](@ref)s to store a tuple of fields f [`AbstractManoptSolverState`](@ref)s This functor possesses the usual interface of functions called during an -iteration and acts on `(p, s, i)`, where `p` is a [`AbstractManoptProblem`](@ref), -`s` is an [`AbstractManoptSolverState`](@ref) and `i` is the current iteration. +iteration and acts on `(p, s, k)`, where `p` is a [`AbstractManoptProblem`](@ref), +`s` is an [`AbstractManoptSolverState`](@ref) and `k` is the current iteration. # Fields @@ -478,10 +478,10 @@ function _store_vector_assert_type( end function (a::StoreStateAction)( - amp::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + amp::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) - (!a.once || a.last_stored != i) && (update_storage!(a, amp, s)) - a.last_stored = i + (!a.once || a.last_stored != k) && (update_storage!(a, amp, s)) + a.last_stored = k return a end diff --git a/src/plans/stepsize.jl b/src/plans/stepsize.jl index 476c4d26e8..5485cdd4d9 100644 --- a/src/plans/stepsize.jl +++ b/src/plans/stepsize.jl @@ -99,12 +99,12 @@ A functor that represents several decreasing step sizes # Fields -* `exponent`: (`1`) a value ``e`` the current iteration numbers ``e``th exponential is +* `exponent`: a value ``e`` the current iteration numbers ``e``th exponential is taken of -* `factor`: (`1`) a value ``f`` to multiply the initial step size with every iteration -* `length`: (`1`) the initial step size ``l``. -* `subtrahend`: (`0`) a value ``a`` that is subtracted every iteration -* `shift`: (`0`) shift the denominator iterator ``i`` by ``s```. +* `factor`: a value ``f`` to multiply the initial step size with every iteration +* `length`: the initial step size ``l``. +* `subtrahend`: a value ``a`` that is subtracted every iteration +* `shift`: shift the denominator iterator ``i`` by ``s```. * `type`: a symbol that indicates whether the stepsize is relatively (:relative), with respect to the gradient norm, or absolutely (:absolute) constant. @@ -122,10 +122,13 @@ and hence the default simplifies to just ``s_i = \frac{l}{i}`` Alternatively one can also use the following keyword. - DecreasingStepsize( - M::AbstractManifold=DefaultManifold(3); - length=injectivity_radius(M)/2, multiplier=1.0, subtrahend=0.0, - exponent=1.0, shift=0, type=:relative + DecreasingStepsize(M::AbstractManifold=DefaultManifold(3); + length=injectivity_radius(M)/2, + multiplier=1.0, + subtrahend=0.0, + exponent=1.0, + shift=0, + type=:relative, ) initializes all fields, where none of them is mandatory and the length is set to @@ -156,9 +159,9 @@ function DecreasingStepsize( return DecreasingStepsize(length, factor, subtrahend, exponent, shift, type) end function (s::DecreasingStepsize)( - amp::P, ams::O, i::Int, args...; kwargs... + amp::P, ams::O, k::Int, args...; kwargs... ) where {P<:AbstractManoptProblem,O<:AbstractManoptSolverState} - ds = (s.length - i * s.subtrahend) * (s.factor^i) / ((i + s.shift)^(s.exponent)) + ds = (s.length - k * s.subtrahend) * (s.factor^k) / ((k + s.shift)^(s.exponent)) if s.type == :absolute ns = norm(get_manifold(amp), get_iterate(ams), get_gradient(ams)) if ns > eps(eltype(ds)) @@ -199,19 +202,20 @@ function armijo_initial_guess( return ifelse(isfinite(max_step), min(l, max_step / grad_norm), l) end -@doc raw""" +@doc """ ArmijoLinesearch <: Linesearch A functor representing Armijo line search including the last runs state string the last stepsize. # Fields -* `initial_stepsize`: (`1.0`) and initial step size -* `retraction_method`: (`default_retraction_method(M)`) the retraction to use -* `contraction_factor`: (`0.95`) exponent for line search reduction -* `sufficient_decrease`: (`0.1`) gain within Armijo's rule -* `last_stepsize`: (`initialstepsize`) the last step size to start the search with -* `initial_guess`: (`(p,s,i,l) -> l`) based on a [`AbstractManoptProblem`](@ref) `p`, +* `candidate_point`: to store an interim result +* `initial_stepsize`: and initial step size +* `retraction_method`: the retraction to use +* `contraction_factor`: exponent for line search reduction +* `sufficient_decrease`: gain within Armijo's rule +* `last_stepsize`: the last step size to start the search with +* `initial_guess`: based on a [`AbstractManoptProblem`](@ref) `p`, [`AbstractManoptSolverState`](@ref) `s` and a current iterate `i` and a last step size `l`, this returns an initial guess. The default uses the last obtained stepsize * `additional_decrease_condition`: (`(M,p) -> true`) specify a condition a new point has to additionally @@ -219,16 +223,12 @@ A functor representing Armijo line search including the last runs state string t * `additional_increase_condition`: (`(M,p) -> true`) specify a condtion that additionally to checking a valid increase has to be fulfilled. The default accepts all points. -as well as for internal use - -* `candidate_point`: (`allocate_result(M, rand)`) to store an interim result - Furthermore the following fields act as safeguards -* `stop_when_stepsize_less`: (`0.0`) smallest stepsize when to stop (the last one before is taken) -* `stop_when_stepsize_exceeds`: ([`max_stepsize`](@ref)`(M, p)`) largest stepsize when to stop. -* `stop_increasing_at_step`: (`100`) last step to increase the stepsize (phase 1), -* `stop_decreasing_at_step`: (`1000`) last step size to decrease the stepsize (phase 2), +* `stop_when_stepsize_less`: smallest stepsize when to stop (the last one before is taken) +* `stop_when_stepsize_exceeds`: largest stepsize when to stop. +* `stop_increasing_at_step`: last step to increase the stepsize (phase 1), +* `stop_decreasing_at_step`: last step size to decrease the stepsize (phase 2), Pass `:Messages` to a `debug=` to see `@info`s when these happen. @@ -240,16 +240,26 @@ with the fields keyword arguments and the retraction is set to the default retra The constructors return the functor to perform Armijo line search, where - (a::ArmijoLinesearch)(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i) + (a::ArmijoLinesearch)(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, k) of a [`AbstractManoptProblem`](@ref) `amp`, [`AbstractManoptSolverState`](@ref) `ams` and a current iterate `i` with keywords. ## Keyword arguments - * `candidate_point`: (`allocate_result(M, rand)`) to pass memory for the candidate point - * `η`: (`-get_gradient(mp, get_iterate(s));`) the search direction to use, - by default the steepest descent direction. +* `candidate_point=`(`allocate_result(M, rand)`) +* `η=-`[`get_gradient`](@ref)`(mp, get_iterate(s))` +* `initial_stepsize=1.0` +* $_kw_retraction_method_default: + $_kw_retraction_method +* `contraction_factor=0.95` +* `sufficient_decrease=0.1` +* `last_stepsize=initialstepsize` +* `initial_guess=(p,s,i,l) -> l` +* `stop_when_stepsize_less=0.0` +* `stop_when_stepsize_exceeds` +* `stop_increasing_at_step=100` +* `stop_decreasing_at_step=1000` """ mutable struct ArmijoLinesearch{TRM<:AbstractRetractionMethod,P,I,F,IGF,DF,IF} <: Linesearch candidate_point::P @@ -302,13 +312,13 @@ end function (a::ArmijoLinesearch)( mp::AbstractManoptProblem, s::AbstractManoptSolverState, - i::Int, + k::Int, η=-get_gradient(mp, get_iterate(s)); kwargs..., ) p = get_iterate(s) X = get_gradient!(mp, get_gradient(s), p) - return a(mp, p, X, η; initial_guess=a.initial_guess(mp, s, i, a.last_stepsize)) + return a(mp, p, X, η; initial_guess=a.initial_guess(mp, s, k, a.last_stepsize)) end function (a::ArmijoLinesearch)( mp::AbstractManoptProblem, p, X, η; initial_guess=1.0, kwargs... @@ -365,26 +375,9 @@ function set_manopt_parameter!(a::ArmijoLinesearch, ::Val{:IncreaseCondition}, a return a end -@doc raw""" - (s, msg) = linesearch_backtrack( - M, - F, - p, - X, - s, - decrease, - contract, - η = -X, - f0 = f(p); - retraction_method=default_retraction_method(M), - stop_when_stepsize_less=0.0, - stop_when_stepsize_exceeds=max_stepsize(M, p) / norm(M, p, η), - stop_increasing_at_step=100, - stop_decreasing_at_step=1000 - additional_increase_condition = (M,p) -> true, - additional_decrease_condition = (M,p) -> true, - ) - (s, msg) = linesearch_backtrack!(M, q, F, p, X, s, decrease, contract, η = -X, f0 = f(p)) +@doc """ + (s, msg) = linesearch_backtrack(M, F, p, X, s, decrease, contract η = -X, f0 = f(p); kwargs...) + (s, msg) = linesearch_backtrack!(M, q, F, p, X, s, decrease, contract η = -X, f0 = f(p); kwargs...) perform a line search @@ -395,21 +388,20 @@ perform a line search * an initial stepsize `s` * a sufficient `decrease` * a `contract`ion factor ``σ`` -* a `retr`action, which defaults to the `default_retraction_method(M)` * a search direction ``η = -X`` * an offset, ``f_0 = F(x)`` -the method can also be performed in-place of `q`, that is the resulting best point one reaches -with the step size `s` as second argument. +## Keyword arguments -## Keywords +* $_kw_retraction_method_default + $_kw_retraction_method +* `stop_when_stepsize_less=0.0`: to avoid numerical underflow +* `stop_when_stepsize_exceeds=`[`max_stepsize`](@ref)`(M, p) / norm(M, p, η)`) to avoid leaving the injectivity radius on a manifold +* `stop_increasing_at_step=100`: stop the initial increase of step size after these many steps +* `stop_decreasing_at_step=`1000`: stop the decreasing search after these many steps +* `additional_increase_condition=(M,p) -> true`: impose an additional condition for an increased step size to be accepted +* `additional_decrease_condition=(M,p) -> true`: impose an additional condition for an decreased step size to be accepted -* `retraction_method`: the retraction to use. -* `stop_when_stepsize_less`: stop a bit early to avoid numerical underflow -* `stop_when_stepsize_exceeds`: stop at a max step size to avoid leaving the injectivity radius on a manifold -* `stop_increasing_at_step`: stop the initial increase of step size after these many steps -* `stop_decreasing_at_step`: stop the decreasing search after these many steps -* `increase_condition`: These keywords are used as safeguards, where only the max stepsize is a very manifold specific one. # Return value @@ -440,7 +432,7 @@ function linesearch_backtrack!( contract, η::T=-X, f0=f(M, p); - retraction_method::AbstractRetractionMethod=default_retraction_method(M), + retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)), additional_increase_condition=(M, p) -> true, additional_decrease_condition=(M, p) -> true, stop_when_stepsize_less=0.0, @@ -498,104 +490,141 @@ function linesearch_backtrack!( return (s, msg) end -@doc raw""" - NonmonotoneLinesearch <: Linesearch - -A functor representing a nonmonotone line search using the Barzilai-Borwein step size [IannazzoPorcelli:2017](@cite). -Together with a gradient descent algorithm this line search represents the Riemannian Barzilai-Borwein with nonmonotone line-search (RBBNMLS) algorithm. -The order is shifted in comparison of the algorithm steps from the paper -by Iannazzo and Porcelli so that in each iteration this line search first finds - +_doc_NM_linesearch = raw""" ```math y_{k} = \operatorname{grad}F(x_{k}) - \operatorname{T}_{x_{k-1} → x_k}(\operatorname{grad}F(x_{k-1})) ``` +""" -and - +_doc_NM_linesearch2 = raw""" ```math s_{k} = - α_{k-1} * \operatorname{T}_{x_{k-1} → x_k}(\operatorname{grad}F(x_{k-1})), ``` +""" -where ``α_{k-1}`` is the step size computed in the last iteration and ``\operatorname{T}`` is a vector transport. -Then the Barzilai—Borwein step size is - +_doc_NM_BB = raw""" ```math α_k^{\text{BB}} = \begin{cases} \min(α_{\text{max}}, \max(α_{\text{min}}, τ_{k})), & \text{if } ⟨s_{k}, y_{k}⟩_{x_k} > 0,\\ α_{\text{max}}, & \text{else,} \end{cases} ``` +""" -where - +_doc_NM_BB_direct = raw""" ```math τ_{k} = \frac{⟨s_{k}, s_{k}⟩_{x_k}}{⟨s_{k}, y_{k}⟩_{x_k}}, ``` +""" -if the direct strategy is chosen, - +_doc_NM_BB_indirect = raw""" ```math -τ_{k} = \frac{⟨s_{k}, y_{k}⟩_{x_k}}{⟨y_{k}, y_{k}⟩_{x_k}}, +τ_{k} = \frac{⟨s_{k}, s_{k}⟩_{x_k}}{⟨s_{k}, y_{k}⟩_{x_k}}, ``` +""" -in case of the inverse strategy and an alternation between the two in case of the -alternating strategy. Then find the smallest ``h = 0, 1, 2, …`` such that - +_doc_NM_BB_h = raw""" ```math F(\operatorname{retr}_{x_k}(- σ^h α_k^{\text{BB}} \operatorname{grad}F(x_k))) \leq \max_{1 ≤ j ≤ \min(k+1,m)} F(x_{k+1-j}) - γ σ^h α_k^{\text{BB}} ⟨\operatorname{grad}F(x_k), \operatorname{grad}F(x_k)⟩_{x_k}, ``` +""" + +_doc_NM_final = raw""" +```math +α_k = σ^h α_k^{\text{BB}}. +``` +""" + +@doc """ + NonmonotoneLinesearch <: Linesearch + +A functor representing a nonmonotone line search using the Barzilai-Borwein step size [IannazzoPorcelli:2017](@cite). +Together with a gradient descent algorithm this line search represents the Riemannian Barzilai-Borwein with nonmonotone line-search (RBBNMLS) algorithm. +The order is shifted in comparison of the algorithm steps from the paper +by Iannazzo and Porcelli so that in each iteration this line search first finds + +$_doc_NM_linesearch + +and + +$_doc_NM_linesearch2 + +where ``α_{k-1}`` is the step size computed in the last iteration and $_l_vt is a vector transport. +Then the Barzilai—Borwein step size is + +$_doc_NM_BB + +where + +$_doc_NM_BB_direct + +if the direct strategy is chosen, + +$_doc_NM_BB_indirect + +in case of the inverse strategy and an alternation between the two in case of the +alternating strategy. Then find the smallest ``h = 0, 1, 2, …`` such that + +$_doc_NM_BB_h where ``σ`` is a step length reduction factor ``∈ (0,1)``, ``m`` is the number of iterations after which the function value has to be lower than the current one and ``γ`` is the sufficient decrease parameter ``∈(0,1)``. + Then find the new stepsize by -```math -α_k = σ^h α_k^{\text{BB}}. -``` +$_doc_NM_final # Fields -* `initial_stepsize`: (`1.0`) the step size to start the search with -* `memory_size`: (`10`) number of iterations after which the cost value needs to be lower than the current one -* `bb_min_stepsize`: (`1e-3`) lower bound for the Barzilai-Borwein step size greater than zero -* `bb_max_stepsize`: (`1e3`) upper bound for the Barzilai-Borwein step size greater than min_stepsize -* `retraction_method`: (`ExponentialRetraction()`) the retraction to use -* `strategy`: (`direct`) defines if the new step size is computed using the direct, indirect or alternating strategy -* `storage`: (for `:Iterate` and `:Gradient`) a [`StoreStateAction`](@ref) -* `stepsize_reduction`: (`0.5`) step size reduction factor contained in the interval (0,1) -* `sufficient_decrease`: (`1e-4`) sufficient decrease parameter contained in the interval (0,1) -* `vector_transport_method`: (`ParallelTransport()`) the vector transport method to use - -as well as for internal use - -* `candidate_point`: (`allocate_result(M, rand)`) to store an interim result +* `initial_stepsize=1.0`: the step size to start the search with +* `memory_size=10`: number of iterations after which the cost value needs to be lower than the current one +* `bb_min_stepsize=1e-3`: lower bound for the Barzilai-Borwein step size greater than zero +* `bb_max_stepsize=1e3`: upper bound for the Barzilai-Borwein step size greater than min_stepsize +* `retraction_method`: the retraction to use +* `strategy=direct`: defines if the new step size is computed using the direct, indirect or alternating strategy +* `storage`: (for `:Iterate` and `:Gradient`) a [`StoreStateAction`](@ref) +* `stepsize_reduction=0.5`: step size reduction factor contained in the interval (0,1) +* `sufficient_decrease=1e-4`: sufficient decrease parameter contained in the interval (0,1) +* `vector_transport_method`: the vector transport method to use +* `candidate_point`: to store an interim result Furthermore the following fields act as safeguards -* `stop_when_stepsize_less: (`0.0`) smallest stepsize when to stop (the last one before is taken) -* `stop_when_stepsize_exceeds`: ([`max_stepsize`](@ref)`(M, p)`) largest stepsize when to stop. -* `stop_increasing_at_step`: (^100`) last step to increase the stepsize (phase 1), -* `stop_decreasing_at_step`: (`1000`) last step size to decrease the stepsize (phase 2), +* `stop_when_stepsize_less`: smallest stepsize when to stop (the last one before is taken) +* `stop_when_stepsize_exceeds`: largest stepsize when to stop. +* `stop_increasing_at_step`: last step to increase the stepsize (phase 1), +* `stop_decreasing_at_step`: last step size to decrease the stepsize (phase 2), Pass `:Messages` to a `debug=` to see `@info`s when these happen. # Constructor - NonmonotoneLinesearch() + NonmonotoneLinesearch(M=DefaultManifold(); kwargs...) -with the fields their order as optional arguments (deprecated). -THis is deprecated, since both defaults and the memory allocation for the candidate do -not take into account which manifold the line search operates on. +geerate the monotone linesearch - NonmonotoneLinesearch(M) - -with the fields as keyword arguments and where the retraction -and vector transport are set to the default ones on `M`, respectively. +## Keyword arguments -The constructors return the functor to perform nonmonotone line search. +* `candidate_point=allocate_result(M, rand)`: to store an interim result +* `initial_stepsize=1.0` +* `memory_size=10` +* `bb_min_stepsize=1e-3` +* `bb_max_stepsize=1e3` +* $_kw_retraction_method_default +* `strategy=direct` +* `storage=[`StoreStateAction`](@ref)`(M; store_fields=[:Iterate, :Gradient])`` +* `stepsize_reduction=0.5` +* `sufficient_decrease=1e-4` +* `stop_when_stepsize_less=0.0` +* `stop_when_stepsize_exceeds=`[`max_stepsize`](@ref)`(M, p)`) +* `stop_increasing_at_step=100` +* `stop_decreasing_at_step=1000` +* $_kw_vector_transport_method_default + +The constructor return the functor to perform nonmonotone line search. """ mutable struct NonmonotoneLinesearch{ TRM<:AbstractRetractionMethod, @@ -690,7 +719,7 @@ end function (a::NonmonotoneLinesearch)( mp::AbstractManoptProblem, s::AbstractManoptSolverState, - i::Int, + k::Int, η=-get_gradient(mp, get_iterate(s)); kwargs..., ) @@ -712,7 +741,7 @@ function (a::NonmonotoneLinesearch)( η, p_old, X_old, - i, + k, ) end function (a::NonmonotoneLinesearch)( @@ -850,7 +879,7 @@ function PolyakStepsize(; γ::F=(i) -> 1 / i, initial_cost_estimate::R=0.0) wher return PolyakStepsize{F,R}(γ, initial_cost_estimate) end function (ps::PolyakStepsize)( - amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i::Int, args...; kwargs... + amp::AbstractManoptProblem, ams::AbstractManoptSolverState, k::Int, args...; kwargs... ) M = get_manifold(amp) p = get_iterate(ams) @@ -858,7 +887,7 @@ function (ps::PolyakStepsize)( # Evaluate the cost c = get_cost(M, get_objective(amp), p) (c < ps.best_cost_value) && (ps.best_cost_value = c) - α = (c - ps.best_cost_value + ps.γ(i)) / (norm(M, p, X)^2) + α = (c - ps.best_cost_value + ps.γ(k)) / (norm(M, p, X)^2) return α end function show(io::IO, ps::PolyakStepsize) @@ -897,13 +926,13 @@ Generate a Wolfe-Powell line search ## Keyword arguments -* `candidate_point`: (`allocate_result(M, rand)`) memory for a candidate -* `candidate_tangent`: (`allocate_result(M, zero_vector, candidate_point)`) memory for a gradient -* `candidate_direcntion`: (`allocate_result(M, zero_vector, candidate_point)`) memory for a direction +* `candidate_point=allocate_result(M, rand)`: memory for a candidate +* `candidate_tangent=allocate_result(M, zero_vector, candidate_point)`: memory for a gradient +* `candidate_direcntion=allocate_result(M, zero_vector, candidate_point)`: memory for a direction * `max_stepsize`: ([`max_stepsize`](@ref)`(M, p)`) largest stepsize allowed here. -* `retraction_method`: (`ExponentialRetraction()`) the retraction to use -* `stop_when_stepsize_less`: (`0.0`) smallest stepsize when to stop (the last one before is taken) -* `vector_transport_method`: (`ParallelTransport()`) the vector transport method to use +* `retraction_method=ExponentialRetraction()`: the retraction to use +* `stop_when_stepsize_less=0.0`: smallest stepsize when to stop (the last one before is taken) +* `vector_transport_method=ParallelTransport()`: the vector transport method to use """ mutable struct WolfePowellLinesearch{ TRM<:AbstractRetractionMethod,VTM<:AbstractVectorTransportMethod,P,T @@ -1040,14 +1069,7 @@ function status_summary(a::WolfePowellLinesearch) return "$a$s" end -@doc raw""" - WolfePowellBinaryLinesearch <: Linesearch - -A [`Linesearch`](@ref) method that determines a step size `t` fulfilling the Wolfe conditions - -based on a binary chop. Let ``η`` be a search direction and ``c1,c_2>0`` be two constants. -Then with - +_doc_WPBL_algorithm = raw"""Then with ```math A(t) = f(x_+) ≤ c1 t ⟨\operatorname{grad}f(x), η⟩_{x} \quad\text{and}\quad @@ -1063,44 +1085,48 @@ Then the following Algorithm is performed similar to Algorithm 7 from [Huang:201 3. If ``A(t)`` fails, set ``β=t``. 4. If ``A(t)`` holds but ``W(t)`` fails, set ``α=t``. 5. If ``β<∞`` set ``t=\frac{α+β}{2}``, otherwise set ``t=2α``. +""" + +@doc """ + WolfePowellBinaryLinesearch <: Linesearch + +A [`Linesearch`](@ref) method that determines a step size `t` fulfilling the Wolfe conditions + +based on a binary chop. Let ``η`` be a search direction and ``c1,c_2>0`` be two constants. + +$_doc_WPBL_algorithm # Constructors -There exist two constructors, where, when provided the manifold `M` as a first (optional) -parameter, its default retraction and vector transport are the default. -In this case the retraction and the vector transport are also keyword arguments for ease of use. -The other constructor is kept for backward compatibility. + WolfePowellLinesearch(M=DefaultManifold(), c1=10^(-4), c2=0.999; kwargs...) - WolfePowellLinesearch( - M=DefaultManifold(), - c1::Float64=10^(-4), - c2::Float64=0.999; - retraction_method = default_retraction_method(M), - vector_transport_method = default_vector_transport(M), - linesearch_stopsize = 0.0 - ) +## Keyword arguments + +* `linesearch_stopsize = 0.0`: a numerical barrier when to stop due to underflow +* $_kw_retraction_method_default: + $_kw_retraction_method +* $_kw_vector_transport_method_default: + $_kw_vector_transport_method """ mutable struct WolfePowellBinaryLinesearch{ - TRM<:AbstractRetractionMethod,VTM<:AbstractVectorTransportMethod + TRM<:AbstractRetractionMethod,VTM<:AbstractVectorTransportMethod,F } <: Linesearch retraction_method::TRM vector_transport_method::VTM - c1::Float64 - c2::Float64 - last_stepsize::Float64 - linesearch_stopsize::Float64 + c1::F + c2::F + last_stepsize::F + linesearch_stopsize::F function WolfePowellBinaryLinesearch( M::AbstractManifold=DefaultManifold(), - c1::Float64=10^(-4), - c2::Float64=0.999; - retraction_method::AbstractRetractionMethod=default_retraction_method(M), - vector_transport_method::AbstractVectorTransportMethod=default_vector_transport_method( - M - ), - linesearch_stopsize::Float64=0.0, - ) - return new{typeof(retraction_method),typeof(vector_transport_method)}( + c1::F=10^(-4), + c2::F=0.999; + retraction_method::RTM=default_retraction_method(M), + vector_transport_method::VTM=default_vector_transport_method(M), + linesearch_stopsize::F=0.0, + ) where {F,RTM<:AbstractRetractionMethod,VTM<:AbstractVectorTransportMethod} + return new{RTM,VTM,F}( retraction_method, vector_transport_method, c1, c2, 0.0, linesearch_stopsize ) end @@ -1203,11 +1229,11 @@ Note that for ``α=0`` this is the Riemannian variant of `WNGRad`. # Fields -* `count_threshold::Int`: (`4`) an `Integer` for ``\hat c`` -* `minimal_bound::Float64`: (`1e-4`) for ``b_{\mathrm{min}}`` -* `alternate_bound::Function`: (`(bk, hat_c) -> min(gradient_bound, max(gradient_bound, bk/(3*hat_c)`) +* `count_threshold::Int=4`: an `Integer` for ``\hat c`` +* `minimal_bound::Float64=1e-4`: for ``b_{\mathrm{min}}`` +* `alternate_bound::Function=(bk, hat_c) -> min(gradient_bound, max(gradient_bound, bk/(3*hat_c)`: how to determine ``\hat b_k`` as a function of `(bmin, bk, hat_c) -> hat_bk` -* `gradient_reduction::Float64`: (`0.9`) +* `gradient_reduction::Float64=0.9`: * `gradient_bound` `norm(M, p0, grad_f(M,p0))` the bound ``b_k``. as well as the internal fields @@ -1221,8 +1247,8 @@ as well as the internal fields Where all fields with defaults are keyword arguments and additional keyword arguments are -* `adaptive`: (`true`) switches the `gradient_reduction ``α`` to `0`. -* `evaluation`: (`AllocatingEvaluation()`) specifies whether the gradient (that is used for initialisation only) is mutating or allocating +* `adaptive=true`: switches the `gradient_reduction ``α`` to `0`. +* `evaluation=AllocatingEvaluation()`: specifies whether the gradient (that is used for initialisation only) is mutating or allocating """ mutable struct AdaptiveWNGradient{I<:Integer,R<:Real,F<:Function} <: Stepsize count_threshold::I diff --git a/src/plans/stochastic_gradient_plan.jl b/src/plans/stochastic_gradient_plan.jl index da3f130aab..2f720958ba 100644 --- a/src/plans/stochastic_gradient_plan.jl +++ b/src/plans/stochastic_gradient_plan.jl @@ -85,7 +85,7 @@ end Evaluate all summands gradients ``\{\operatorname{grad}f_i\}_{i=1}^n`` at `p` (in place of `X`). -If you use a single function for the stochastic gradient, that works in-place, then `get_gradient` is not available, +If you use a single function for the stochastic gradient, that works in-place, then [`get_gradient`](@ref) is not available, since the length (or number of elements of the gradient) can not be determined. """ function get_gradients( @@ -180,7 +180,7 @@ end Evaluate one of the summands gradients ``\operatorname{grad}f_k``, ``k∈\{1,…,n\}``, at `x` (in place of `Y`). -If you use a single function for the stochastic gradient, that works in-place, then `get_gradient` is not available, +If you use a single function for the stochastic gradient, that works in-place, then [`get_gradient`](@ref) is not available, since the length (or number of elements of the gradient required for allocation) can not be determined. """ function get_gradient( @@ -272,7 +272,7 @@ end Evaluate the complete gradient ``\operatorname{grad} f = \displaystyle\sum_{i=1}^n \operatorname{grad} f_i(p)`` at `p` (in place of `X`). -If you use a single function for the stochastic gradient, that works in-place, then `get_gradient` is not available, +If you use a single function for the stochastic gradient, that works in-place, then [`get_gradient`](@ref) is not available, since the length (or number of elements of the gradient required for allocation) can not be determined. """ function get_gradient( diff --git a/src/plans/stopping_criterion.jl b/src/plans/stopping_criterion.jl index b2aeddd9bb..da25f7e669 100644 --- a/src/plans/stopping_criterion.jl +++ b/src/plans/stopping_criterion.jl @@ -88,15 +88,15 @@ mutable struct StopAfter <: StoppingCriterion end end end -function (c::StopAfter)(::AbstractManoptProblem, ::AbstractManoptSolverState, i::Int) - if value(c.start) == 0 || i <= 0 # (re)start timer +function (c::StopAfter)(::AbstractManoptProblem, ::AbstractManoptSolverState, k::Int) + if value(c.start) == 0 || k <= 0 # (re)start timer c.at_iteration = -1 c.start = Nanosecond(time_ns()) c.time = Nanosecond(0) else c.time = Nanosecond(time_ns()) - c.start - if i > 0 && (c.time > Nanosecond(c.threshold)) - c.at_iteration = i + if k > 0 && (c.time > Nanosecond(c.threshold)) + c.at_iteration = k return true end end @@ -149,16 +149,16 @@ initialize the functor to indicate to stop after `maxIter` iterations. mutable struct StopAfterIteration <: StoppingCriterion max_iterations::Int at_iteration::Int - StopAfterIteration(i::Int) = new(i, -1) + StopAfterIteration(k::Int) = new(k, -1) end function (c::StopAfterIteration)( - ::P, ::S, i::Int + ::P, ::S, k::Int ) where {P<:AbstractManoptProblem,S<:AbstractManoptSolverState} - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end - if i >= c.max_iterations - c.at_iteration = i + if k >= c.max_iterations + c.at_iteration = k return true end return false @@ -241,21 +241,21 @@ function StopWhenChangeLess( ε, zero(ε), storage, inverse_retraction_method, -1 ) end -function (c::StopWhenChangeLess)(mp::AbstractManoptProblem, s::AbstractManoptSolverState, i) - if i == 0 # reset on init +function (c::StopWhenChangeLess)(mp::AbstractManoptProblem, s::AbstractManoptSolverState, k) + if k == 0 # reset on init c.at_iteration = -1 end if has_storage(c.storage, PointStorageKey(:Iterate)) M = get_manifold(mp) p_old = get_storage(c.storage, PointStorageKey(:Iterate)) c.last_change = distance(M, get_iterate(s), p_old, c.inverse_retraction) - if c.last_change < c.threshold && i > 0 - c.at_iteration = i - c.storage(mp, s, i) + if c.last_change < c.threshold && k > 0 + c.at_iteration = k + c.storage(mp, s, k) return true end end - c.storage(mp, s, i) + c.storage(mp, s, k) return false end function get_reason(c::StopWhenChangeLess) @@ -305,14 +305,14 @@ mutable struct StopWhenCostLess{F} <: StoppingCriterion end end function (c::StopWhenCostLess)( - p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + p::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end c.last_cost = get_cost(p, get_iterate(s)) if c.last_cost < c.threshold - c.at_iteration = i + c.at_iteration = k return true end return false @@ -389,21 +389,21 @@ function StopWhenEntryChangeLess( end function (sc::StopWhenEntryChangeLess)( - mp::AbstractManoptProblem, s::AbstractManoptSolverState, i + mp::AbstractManoptProblem, s::AbstractManoptSolverState, k ) - if i == 0 # reset on init + if k == 0 # reset on init sc.at_iteration = -1 end if has_storage(sc.storage, sc.field) old_field_value = get_storage(sc.storage, sc.field) sc.last_change = sc.distance(mp, s, old_field_value, getproperty(s, sc.field)) - if (i > 0) && (sc.last_change < sc.threshold) - sc.at_iteration = i - sc.storage(mp, s, i) + if (k > 0) && (sc.last_change < sc.threshold) + sc.at_iteration = k + sc.storage(mp, s, k) return true end end - sc.storage(mp, s, i) + sc.storage(mp, s, k) return false end function get_reason(sc::StopWhenEntryChangeLess) @@ -480,10 +480,10 @@ function StopWhenGradientChangeLess( return StopWhenGradientChangeLess(DefaultManifold(1), ε; storage=storage, kwargs...) end function (c::StopWhenGradientChangeLess)( - mp::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + mp::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) M = get_manifold(mp) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end if has_storage(c.storage, PointStorageKey(:Iterate)) && @@ -494,13 +494,13 @@ function (c::StopWhenGradientChangeLess)( p = get_iterate(s) Xt = vector_transport_to(M, p_old, X_old, p, c.vector_transport_method) c.last_change = norm(M, p, Xt - get_gradient(s)) - if c.last_change < c.threshold && i > 0 - c.at_iteration = i - c.storage(mp, s, i) + if c.last_change < c.threshold && k > 0 + c.at_iteration = k + c.storage(mp, s, k) return true end end - c.storage(mp, s, i) + c.storage(mp, s, k) return false end function get_reason(c::StopWhenGradientChangeLess) @@ -567,16 +567,16 @@ mutable struct StopWhenGradientNormLess{F,TF} <: StoppingCriterion end function (sc::StopWhenGradientNormLess)( - mp::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + mp::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) M = get_manifold(mp) - if i == 0 # reset on init + if k == 0 # reset on init sc.at_iteration = -1 end - if (i > 0) + if (k > 0) sc.last_change = sc.norm(M, get_iterate(s), get_gradient(s)) if sc.last_change < sc.threshold - sc.at_iteration = i + sc.at_iteration = k return true end end @@ -631,14 +631,14 @@ mutable struct StopWhenStepsizeLess{F} <: StoppingCriterion end end function (c::StopWhenStepsizeLess)( - p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + p::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end - c.last_stepsize = get_last_stepsize(p, s, i) - if c.last_stepsize < c.threshold && i > 0 - c.at_iteration = i + c.last_stepsize = get_last_stepsize(p, s, k) + if c.last_stepsize < c.threshold && k > 0 + c.at_iteration = k return true end return false @@ -683,14 +683,14 @@ mutable struct StopWhenCostNaN <: StoppingCriterion StopWhenCostNaN() = new(-1) end function (c::StopWhenCostNaN)( - p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + p::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end - # but still check + # but still verify whether it yields NaN if isnan(get_cost(p, get_iterate(s))) - c.at_iteration = i + c.at_iteration = k return true end return false @@ -726,12 +726,12 @@ mutable struct StopWhenIterateNaN <: StoppingCriterion StopWhenIterateNaN() = new(-1) end function (c::StopWhenIterateNaN)( - p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + p::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end - if (i >= 0) && any(isnan.(get_iterate(s))) + if (k >= 0) && any(isnan.(get_iterate(s))) c.at_iteration = 0 return true end @@ -777,13 +777,13 @@ mutable struct StopWhenSmallerOrEqual{R} <: StoppingCriterion end end function (c::StopWhenSmallerOrEqual)( - ::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + ::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end if getfield(s, c.value) <= c.minValue - c.at_iteration = i + c.at_iteration = k return true end return false @@ -824,15 +824,15 @@ mutable struct StopWhenSubgradientNormLess{R} <: StoppingCriterion StopWhenSubgradientNormLess(ε::R) where {R<:Real} = new{R}(-1, ε, zero(ε)) end function (c::StopWhenSubgradientNormLess)( - mp::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int + mp::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int ) M = get_manifold(mp) - if (i == 0) # reset on init + if (k == 0) # reset on init c.at_iteration = -1 end c.value = norm(M, get_iterate(s), get_subgradient(s)) - if (c.value < c.threshold) && (i > 0) - c.at_iteration = i + if (c.value < c.threshold) && (k > 0) + c.at_iteration = k return true end return false @@ -888,10 +888,10 @@ mutable struct StopWhenAll{TCriteria<:Tuple} <: StoppingCriterionSet StopWhenAll(c::Vector{StoppingCriterion}) = new{typeof(tuple(c...))}(tuple(c...), -1) StopWhenAll(c...) = new{typeof(c)}(c, -1) end -function (c::StopWhenAll)(p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int) - (i == 0) && (c.at_iteration = -1) # reset on init - if all(subC -> subC(p, s, i), c.criteria) - c.at_iteration = i +function (c::StopWhenAll)(p::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int) + (k == 0) && (c.at_iteration = -1) # reset on init + if all(subC -> subC(p, s, k), c.criteria) + c.at_iteration = k return true end return false @@ -982,10 +982,10 @@ end end end -function (c::StopWhenAny)(p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int) - (i == 0) && (c.at_iteration = -1) # reset on init - if _fast_any(subC -> subC(p, s, i), c.criteria) - c.at_iteration = i +function (c::StopWhenAny)(p::AbstractManoptProblem, s::AbstractManoptSolverState, k::Int) + (k == 0) && (c.at_iteration = -1) # reset on init + if _fast_any(subC -> subC(p, s, k), c.criteria) + c.at_iteration = k return true end return false diff --git a/src/solvers/ChambollePock.jl b/src/solvers/ChambollePock.jl index 216f3b1aa9..7c405bfb35 100644 --- a/src/solvers/ChambollePock.jl +++ b/src/solvers/ChambollePock.jl @@ -1,34 +1,35 @@ -@doc raw""" +@doc """ ChambollePockState <: AbstractPrimalDualSolverState stores all options and variables within a linearized or exact Chambolle Pock. -The following list provides the order for the constructor, where the previous iterates are -initialized automatically and values with a default may be left out. - -* `m`: base point on ``\mathcal M`` -* `n`: base point on ``\mathcal N`` -* `p`: an initial point on ``x^{(0)} ∈\mathcal M`` (and its previous iterate) -* `X`: an initial tangent vector ``X^{(0)}∈T^*\mathcal N`` (and its previous iterate) -* `pbar`: the relaxed iterate used in the next dual update step (when using `:primal` relaxation) -* `Xbar`: the relaxed iterate used in the next primal update step (when using `:dual` relaxation) -* `primal_stepsize`: (`1/sqrt(8)`) proximal parameter of the primal prox -* `dual_stepsize`: (`1/sqrt(8)`) proximal parameter of the dual prox -* `acceleration`: (`0.`) acceleration factor due to Chambolle & Pock -* `relaxation`: (`1.`) relaxation in the primal relaxation step (to compute `pbar`) -* `relax`: (`:primal`) which variable to relax (`:primal` or `:dual`) -* `stop`: a [`StoppingCriterion`](@ref) -* `variant`: (`exact`) whether to perform an `:exact` or `:linearized` Chambolle-Pock -* `update_primal_base`: (`(p,o,i) -> o.m`) function to update the primal base -* `update_dual_base`: (`(p,o,i) -> o.n`) function to update the dual base -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) the retraction to use -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) an inverse - retraction to use on the manifold ``\mathcal M``. -* `inverse_retraction_method_dual`: (`default_inverse_retraction_method(N, typeof(n))`) - an inverse retraction to use on manifold ``\mathcal N``. -* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport to - use on the manifold ``\mathcal M``. -* `vector_transport_method_dual`: (`default_vector_transport_method(N, typeof(n))`) a - vector transport to use on manifold ``\mathcal N``. + +# Fields + +* `acceleration::R`: acceleration factor +* `dual_stepsize::R`: proximal parameter of the dual prox +* $(_field_inv_retr) +* `inverse_retraction_method_dual::`[`AbstractInverseRetractionMethod`](@extref `ManifoldsBase.AbstractInverseRetractionMethod`): + an inverse retraction ``$(_l_retr)^{-1}`` on ``$(_l_Manifold("N"))`` +* `m::P`: base point on ``$(_l_M)`` +* `n::Q`: base point on ``$(_l_Manifold("N"))`` +* `p::P`: an initial point on ``p^{(0)} ∈ $(_l_M)`` +* `pbar::P`: the relaxed iterate used in the next dual update step (when using `:primal` relaxation) +* `primal_stepsize::R`: proximal parameter of the primal prox +* `X::T`: an initial tangent vector ``X^{(0)} ∈ T_{p^{(0)}}$(_l_M)`` +* `Xbar::T`: the relaxed iterate used in the next primal update step (when using `:dual` relaxation) +* `relaxation::R`: relaxation in the primal relaxation step (to compute `pbar`: +* `relax::Symbol: which variable to relax (`:primal` or `:dual`: +* $(_field_retr) +* `stop`: a [`StoppingCriterion`](@ref) +* `variant`: whether to perform an `:exact` or `:linearized` Chambolle-Pock +* `update_primal_base`: function `(pr, st, k) -> m` to update the primal base +* `update_dual_base`: function `(pr, st, k) -> n` to update the dual base +* $(_field_vector_transp) +* `vector_transport_method_dual::`[`AbstractVectorTransportMethod`](@extref `ManifoldsBase.AbstractVectorTransportMethod`): + a vector transport ``$(_l_vt)``on ``$(_l_Manifold("N"))`` + +Here, `P` is a point type on ``$(_l_M)``, `T` its tangent vector type, `Q` a point type on ``$(_l_Manifold("N"))``, +and `R<:Real` is a real number type where for the last two the functions a [`AbstractManoptProblem`](@ref)` p`, [`AbstractManoptSolverState`](@ref)` o` and the current iterate `i` are the arguments. @@ -38,11 +39,28 @@ If you activate these to be different from the default identity, you have to pro # Constructor ChambollePockState(M::AbstractManifold, N::AbstractManifold, - m::P, n::Q, p::P, X::T, primal_stepsize::Float64, dual_stepsize::Float64; + m::P, n::Q, p::P, X::T, primal_stepsize::R, dual_stepsize::R; kwargs... - ) + ) where {P, Q, T, R <: Real} + +# Keyword arguments -where all other fields are keyword arguments with their default values given in brackets. +* `acceleration=0.0` +* `dual_stepsize=1/sqrt(8)` +* `primal_stepsize=1/sqrt(8)` +* $_kw_inverse_retraction_method_default: $_kw_inverse_retraction_method +* `inverse_retraction_method_dual=`[`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`)`(N, typeof(n))` + an inverse retraction ``$(_l_retr)^{-1}`` to use on ``$(_l_Manifold("N"))``, see [the section on retractions and their inverses](@extref ManifoldsBase :doc:`retractions`). +* `relaxation=1.0` +* `relax=:primal`: relax the primal variable by default +* $_kw_retraction_method_default: $_kw_retraction_method +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(300)` +* `variant=:exact`: run the exact Chambolle Pock by default +* `update_primal_base=missing` +* `update_dual_base=missing` +* $_kw_vector_transport_method_default: $_kw_vector_transport_method +* `vector_transport_method=`[`default_vector_transport_method`](@extref `ManifoldsBase.default_vector_transport_method-Tuple{AbstractManifold}`)`(N, typeof(n))`: + a vector transport ``$(_l_vt)`` to use on ``$(_l_Manifold("N"))``, see [the section on vector transports](@extref ManifoldsBase :doc:`vector_transports`). if `Manifolds.jl` is loaded, `N` is also a keyword argument and set to `TangentBundle(M)` by default. """ @@ -50,6 +68,8 @@ mutable struct ChambollePockState{ P, Q, T, + R, + SC<:StoppingCriterion, RM<:AbstractRetractionMethod, IRM<:AbstractInverseRetractionMethod, IRM_Dual<:AbstractInverseRetractionMethod, @@ -62,12 +82,12 @@ mutable struct ChambollePockState{ pbar::P X::T Xbar::T - primal_stepsize::Float64 - dual_stepsize::Float64 - acceleration::Float64 - relaxation::Float64 + primal_stepsize::R + dual_stepsize::R + acceleration::R + relaxation::R relax::Symbol - stop::StoppingCriterion + stop::SC variant::Symbol update_primal_base::Union{Function,Missing} update_dual_base::Union{Function,Missing} @@ -84,12 +104,12 @@ function Manopt.ChambollePockState( n::Q, p::P, X::T; - primal_stepsize::Float64=1 / sqrt(8), - dual_stepsize::Float64=1 / sqrt(8), - acceleration::Float64=0.0, - relaxation::Float64=1.0, + primal_stepsize::R=1 / sqrt(8), + dual_stepsize::R=1 / sqrt(8), + acceleration::R=0.0, + relaxation::R=1.0, relax::Symbol=:primal, - stopping_criterion::StoppingCriterion=StopAfterIteration(300), + stopping_criterion::SC=StopAfterIteration(300), variant::Symbol=:exact, update_primal_base::Union{Function,Missing}=missing, update_dual_base::Union{Function,Missing}=missing, @@ -104,13 +124,15 @@ function Manopt.ChambollePockState( P, Q, T, + R, + SC<:StoppingCriterion, RM<:AbstractRetractionMethod, IRM<:AbstractInverseRetractionMethod, IRM_Dual<:AbstractInverseRetractionMethod, VTM<:AbstractVectorTransportMethod, VTM_Dual<:AbstractVectorTransportMethod, } - return ChambollePockState{P,Q,T,RM,IRM,IRM_Dual,VTM,VTM_Dual}( + return ChambollePockState{P,Q,T,R,SC,RM,IRM,IRM_Dual,VTM,VTM_Dual}( m, n, p, @@ -166,27 +188,36 @@ function set_iterate!(apds::AbstractPrimalDualSolverState, p) return apds end -@doc raw""" - ChambollePock( - M, N, cost, x0, ξ0, m, n, prox_F, prox_G_dual, adjoint_linear_operator; - forward_operator=missing, - linearized_forward_operator=missing, - evaluation=AllocatingEvaluation() - ) - -Perform the Riemannian Chambolle—Pock algorithm. - +_doc_ChambollePock_formula = raw""" Given a `cost` function ``\mathcal E:\mathcal M → ℝ`` of the form ```math -\mathcal E(p) = F(p) + G( Λ(p) ), +\mathcal f(p) = F(p) + G( Λ(p) ), ``` where ``F:\mathcal M → ℝ``, ``G:\mathcal N → ℝ``, -and ``Λ:\mathcal M → \mathcal N``. The remaining input parameters are +and ``Λ:\mathcal M → \mathcal N``. +""" -* `p, X`: primal and dual start points ``x∈\mathcal M`` and ``ξ∈T_n\mathcal N`` -* `m,n`: base points on ``\mathcal M`` and ``\mathcal N``, respectively. -* `adjoint_linearized_operator`: the adjoint ``DΛ^*`` of the linearized operator ``DΛ(m): T_{m}\mathcal M → T_{Λ(m)}\mathcal N`` -* `prox_F, prox_G_Dual`: the proximal maps of ``F`` and ``G^\ast_n`` +_doc_ChambollePock = """ + ChambollePock(M, N, f, p, X, m, n, prox_G, prox_G_dual, adjoint_linear_operator; kwargs...) + ChambollePock!(M, N, f, p, X, m, n, prox_G, prox_G_dual, adjoint_linear_operator; kwargs...) + + +Perform the Riemannian Chambolle—Pock algorithm. + +$_doc_ChambollePock_formula + +This can be done inplace of ``p``. + + # Input parameters + + $_arg_M + * `N`, a manifold ``$(_l_Manifold("N"))`` +$_arg_p +$_arg_X +* `m`, a base point on $_l_M +* `n`, a base point on $(_l_Manifold("N")) +* `adjoint_linearized_operator`: the adjoint ``DΛ^*`` of the linearized operator ``$_l_DΛ`` +* `prox_F, prox_G_Dual`: the proximal maps of ``F`` and ``G^\\ast_n`` note that depending on the [`AbstractEvaluationType`](@ref) `evaluation` the last three parameters as well as the forward operator `Λ` and the `linearized_forward_operator` can be given as @@ -198,31 +229,33 @@ By default, this performs the exact Riemannian Chambolle Pock algorithm, see the For more details on the algorithm, see [BergmannHerzogSilvaLouzeiroTenbrinckVidalNunez:2021](@cite). -# Optional parameters - -* `acceleration`: (`0.05`) -* `dual_stepsize`: (`1/sqrt(8)`) proximal parameter of the primal prox -* `evaluation`: ([`AllocatingEvaluation`](@ref)`()) specify whether the proximal maps - and operators are allocating functions `(Manifolds, parameters) -> result` or - given as mutating functions `(Manifold, result, parameters)` -> result` -* `Λ`: (`missing`) the (forward) operator ``Λ(⋅)`` (required for the `:exact` variant) -* `linearized_forward_operator`: (`missing`) its linearization ``DΛ(⋅)[⋅]`` (required for the `:linearized` variant) -* `primal_stepsize`: (`1/sqrt(8)`) proximal parameter of the dual prox -* `relaxation`: (`1.`) the relaxation parameter ``γ`` -* `relax`: (`:primal`) whether to relax the primal or dual -* `variant`: (`:exact` if `Λ` is missing, otherwise `:linearized`) variant to use. - Note that this changes the arguments the `forward_operator` is called with. -* `stopping_criterion`: (`[StopAfterIteration`](@ref)`(100)`) a [`StoppingCriterion`](@ref) -* `update_primal_base`: (`missing`) function to update `m` (identity by default/missing) -* `update_dual_base`: (`missing`) function to update `n` (identity by default/missing) -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) the retraction to use -* `inverse_retraction_method` (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction to use. -* `vector_transport_method` (`default_vector_transport_method(M, typeof(p))`) a vector transport to use +# Keyword Arguments -# Output +* `acceleration=0.05`: acceleration parameter +* `dual_stepsize=1/sqrt(8)`: proximal parameter of the primal prox +* $_kw_evaluation_default: $_kw_evaluation +* $_kw_inverse_retraction_method_default: $_kw_inverse_retraction_method +* `inverse_retraction_method_dual=`[`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`)`(N, typeof(n))` + an inverse retraction ``$(_l_retr)^{-1}`` to use on $(_l_Manifold("N")), see [the section on retractions and their inverses](@extref ManifoldsBase :doc:`retractions`). +* `Λ=missing`: the (forward) operator ``Λ(⋅)`` (required for the `:exact` variant) +* `linearized_forward_operator=missing`: its linearization ``DΛ(⋅)[⋅]`` (required for the `:linearized` variant) +* `primal_stepsize=1/sqrt(8)`: proximal parameter of the dual prox +* `relaxation=1.`: the relaxation parameter ``γ`` +* `relax=:primal`: whether to relax the primal or dual +* `variant=:exact` if `Λ` is missing, otherwise `:linearized`: variant to use. + Note that this changes the arguments the `forward_operator` is called with. +* `stopping_criterion=[StopAfterIteration`](@ref)`(100)`: $_kw_stopping_criterion +* `update_primal_base=missing`: function to update `m` (identity by default/missing) +* `update_dual_base=missing`: function to update `n` (identity by default/missing) +* $_kw_retraction_method_default: $_kw_retraction_method +* $_kw_vector_transport_method_default: $_kw_vector_transport_method +* `vector_transport_method_dual=`[`default_vector_transport_method`](@extref `ManifoldsBase.default_vector_transport_method-Tuple{AbstractManifold}`)`(N, typeof(n))`: + a vector transport ``$_l_vt`` to use on $(_l_Manifold("N")), see [the section on vector transports](@extref ManifoldsBase :doc:`vector_transports`). -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details. +$_doc_sec_output """ + +@doc "$(_doc_ChambollePock)" function ChambollePock( M::AbstractManifold, N::AbstractManifold, @@ -258,12 +291,8 @@ function ChambollePock( kwargs..., ) end -@doc raw""" - ChambollePock(M, N, cost, x0, ξ0, m, n, prox_F, prox_G_dual, adjoint_linear_operator) -Perform the Riemannian Chambolle—Pock algorithm in place of `x`, `ξ`, and potentially `m`, -`n` if they are not fixed. See [`ChambollePock`](@ref) for details and optional parameters. -""" +@doc "$(_doc_ChambollePock)" function ChambollePock!( M::AbstractManifold, N::AbstractManifold, diff --git a/src/solvers/DouglasRachford.jl b/src/solvers/DouglasRachford.jl index 72fea055d8..1bafb772ed 100644 --- a/src/solvers/DouglasRachford.jl +++ b/src/solvers/DouglasRachford.jl @@ -1,38 +1,46 @@ -@doc raw""" +@doc """ DouglasRachfordState <: AbstractManoptSolverState Store all options required for the DouglasRachford algorithm, # Fields -* `p`: the current iterate (result) For the parallel Douglas-Rachford, - this is not a value from the `PowerManifold` manifold but the mean. -* `s`: the last result of the double reflection at the proximal maps relaxed by `α`. -* `λ`: function to provide the value for the proximal parameter during the calls + * `α`: relaxation of the step from old to new iterate, to be precise ``x^{(k+1)} = g(α(k); x^{(k)}, t^{(k)})``, where ``t^{(k)}`` is the result of the double reflection involved in the DR algorithm * `inverse_retraction_method`: an inverse retraction method +* `λ`: function to provide the value for the proximal parameter during the calls +* `parallel`: indicate whether to use a parallel Douglas-Rachford or not. * `R`: method employed in the iteration to perform the reflection of `x` at the prox `p`. +* $(_field_iterate) + For the parallel Douglas-Rachford, this is not a value from the `PowerManifold` manifold but the mean. * `reflection_evaluation`: whether `R` works in-place or allocating -* `retraction_method`: a retraction method -* `stop`: a [`StoppingCriterion`](@ref) -* `parallel`: indicate whether to use a parallel Douglas-Rachford or not. +* $(_field_retr) +* `s`: the last result of the double reflection at the proximal maps relaxed by `α`. +* $(_field_stop) # Constructor DouglasRachfordState(M, p; kwargs...) -Generate the options for a Manifold `M` and an initial point `p`, where the following keyword arguments can be used +# Input + +* $(_arg_M) +* $(_arg_p) -* `λ`: (`(iter)->1.0`) function to provide the value for the proximal parameter - during the calls -* `α`: (`(iter)->0.9`) relaxation of the step from old to new iterate, to be precise +# Keyword arguments + +* `α= k -> 0.9`: relaxation of the step from old to new iterate, to be precise ``x^{(k+1)} = g(α(k); x^{(k)}, t^{(k)})``, where ``t^{(k)}`` is the result of the double reflection involved in the DR algorithm -* `R`: ([`reflect`](@ref) or `reflect!`) method employed in the iteration to perform the reflection of `x` at - the prox `p`, which function is used depends on `reflection_evaluation`. -* `reflection_evaluation`: ([`AllocatingEvaluation`](@ref)`()`) specify whether the reflection works in-place or allocating (default) -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(300)`) a [`StoppingCriterion`](@ref) -* `parallel`: (`false`) indicate whether to use a parallel Douglas-Rachford or not. +* $(_kw_inverse_retraction_method_default): $(_kw_inverse_retraction_method) +* `λ= k -> 1.0`: function to provide the value for the proximal parameter + during the calls +* `R=`[`reflect`](@ref)`(!)`: method employed in the iteration to perform the reflection of `p` at + the prox of `p`, which function is used depends on `reflection_evaluation`. +* `reflection_evaluation=`[`AllocatingEvaluation`](@ref)`()`) specify whether the reflection works in-place or allocating (default) +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(300)`: $(_kw_stopping_criterion) +* `parallel=false`: indicate whether to use a parallel Douglas-Rachford or not. """ mutable struct DouglasRachfordState{ P, @@ -61,8 +69,14 @@ mutable struct DouglasRachfordState{ p::P; λ::Fλ=i -> 1.0, α::Fα=i -> 0.9, - R::FR=Manopt.reflect, reflection_evaluation::E=AllocatingEvaluation(), + R::FR=( + if reflection_evaluation isa AllocatingEvaluation + Manopt.reflect + else + Manopt.reflect! + end + ), stopping_criterion::S=StopAfterIteration(300), parallel=false, retraction_method::TM=default_retraction_method(M, typeof(p)), @@ -117,68 +131,74 @@ function set_iterate!(drs::DouglasRachfordState, p) end function (d::DebugProximalParameter)( - ::AbstractManoptProblem, cpps::DouglasRachfordState, i::Int + ::AbstractManoptProblem, cpps::DouglasRachfordState, k::Int ) - (i > 0) && Printf.format(d.io, Printf.Format(d.format), cpps.λ(i)) + (k > 0) && Printf.format(d.io, Printf.Format(d.format), cpps.λ(k)) return nothing end function (r::RecordProximalParameter)( - ::AbstractManoptProblem, cpps::DouglasRachfordState, i::Int + ::AbstractManoptProblem, cpps::DouglasRachfordState, k::Int ) - return record_or_reset!(r, cpps.λ(i), i) + return record_or_reset!(r, cpps.λ(k), k) end -@doc raw""" +_doc_Douglas_Rachford = """ DouglasRachford(M, f, proxes_f, p) DouglasRachford(M, mpo, p) + DouglasRachford!(M, f, proxes_f, p) + DouglasRachford!(M, mpo, p) -Compute the Douglas-Rachford algorithm on the manifold ``\mathcal M``, initial -data ``p`` and the (two) proximal maps `proxMaps`, see [ BergmannPerschSteidl:2016](@cite). +Compute the Douglas-Rachford algorithm on the manifold ``$(_l_M)``, starting from `p`` +given the (two) proximal maps `proxes_f`, see [ BergmannPerschSteidl:2016](@cite). For ``k>2`` proximal maps, the problem is reformulated using the parallel Douglas Rachford: -a vectorial proximal map on the power manifold ``\mathcal M^k`` is introduced as the first -proximal map and the second proximal map of the is set to the `mean` (Riemannian Center of mass). +a vectorial proximal map on the power manifold ``$(_l_M)^k`` is introduced as the first +proximal map and the second proximal map of the is set to the [`mean`](@extref Statistics.mean-Tuple{AbstractManifold, Vararg{Any}}) (Riemannian center of mass). This hence also boils down to two proximal maps, though each evaluates proximal maps in parallel, that is, component wise in a vector. +!!! note + + The parallel Douglas Rachford does not work in-place for now, since + while creating the new staring point `p'` on the power manifold, a copy of `p` + Is created + If you provide a [`ManifoldProximalMapObjective`](@ref) `mpo` instead, the proximal maps are kept unchanged. # Input -* `M`: a Riemannian Manifold ``\mathcal M`` -* `F`: a cost function consisting of a sum of cost functions + +* $(_arg_M) +* $(_arg_f) * `proxes_f`: functions of the form `(M, λ, p)-> q` performing a proximal maps, where `⁠λ` denotes the proximal parameter, for each of the summands of `F`. These can also be given in the [`InplaceEvaluation`](@ref) variants `(M, q, λ p) -> q` computing in place of `q`. -* `p`: initial data ``p ∈ \mathcal M`` +* $(_arg_p) -# Optional values +# Keyword arguments -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the proximal maps work by allocation (default) form `prox(M, λ, x)` - or [`InplaceEvaluation`](@ref) in-place -* `λ`: (`(iter) -> 1.0`) function to provide the value for the proximal parameter during the calls -* `α`: (`(iter) -> 0.9`) relaxation of the step from old to new iterate, to be precise - ``t_{k+1} = g(α_k; t_k, s_k)``, where ``s_k`` is the result - of the double reflection involved in the DR algorithm -* `inverse_retraction_method` - (`default_inverse_retraction_method(M, typeof(p))`) the inverse retraction to use within - - the reflection (ignored, if you set `R` directly) - - the relaxation step -* `R`: method employed in the iteration to perform the reflection of `x` at the prox `p`. +* `α= k -> 0.9`: relaxation of the step from old to new iterate, to be precise + ``p^{(k+1)} = g(α_k; p^{(k)}, q^{(k)})``, where ``q^{(k)}`` is the result of the double reflection + involved in the DR algorithm and ``g`` is a curve induced by the retraction and its inverse. +* $(_kw_evaluation_default): $(_kw_evaluation) +* $(_kw_inverse_retraction_method_default): $(_kw_inverse_retraction_method) + This is used both in the relaxation step as well as in the reflection, unless you set `R` yourself. +* `λ= k -> 1.0`: function to provide the value for the proximal parameter ``λ_k`` +* `R=reflect(!)`: method employed in the iteration to perform the reflection of `p` at the prox of `p`. This uses by default [`reflect`](@ref) or `reflect!` depending on `reflection_evaluation` and the retraction and inverse retraction specified by `retraction_method` and `inverse_retraction_method`, respectively. * `reflection_evaluation`: ([`AllocatingEvaluation`](@ref) whether `R` works in-place or allocating -* `retraction_method`: (`default_retraction_metiod(M, typeof(p))`) the retraction to use in - - the reflection (ignored, if you set `R` directly) - - the relaxation step -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenChangeLess`](@ref)`(1e-5)`) - a [`StoppingCriterion`](@ref). -* `parallel`: (`false`) indicate whether to use a parallel Douglas-Rachford or not. - -and the ones that are passed to [`decorate_state!`](@ref) for decorators. +* $(_kw_retraction_method_default): $(_kw_retraction_method) + This is used both in the relaxation step as well as in the reflection, unless you set `R` yourself. +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`$(_sc_any)[`StopWhenChangeLess`](@ref)`(1e-5)`: + $(_kw_stopping_criterion) +* `parallel=false`: indicate whether to use a parallel Douglas-Rachford or not. -# Output +$(_kw_others) -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$(_doc_sec_output) """ + +@doc "$(_doc_Douglas_Rachford)" DouglasRachford(::AbstractManifold, args...; kwargs...) function DouglasRachford( M::AbstractManifold, @@ -221,36 +241,7 @@ function DouglasRachford( return DouglasRachford!(M, mpo, q; kwargs...) end -@doc raw""" - DouglasRachford!(M, f, proxes_f, p) - DouglasRachford!(M, mpo, p) - -Compute the Douglas-Rachford algorithm on the manifold ``\mathcal M``, initial -data ``p ∈ \mathcal M`` and the (two) proximal maps `proxes_f` in place of `p`. - -For ``k>2`` proximal maps, the problem is reformulated using the parallel Douglas Rachford: -a vectorial proximal map on the power manifold ``\mathcal M^k`` is introduced as the first -proximal map and the second proximal map of the is set to the `mean` (Riemannian Center of mass). -This hence also boils down to two proximal maps, though each evaluates proximal maps in parallel, -that is component wise in a vector. - -!!! note - - While creating the new staring point `p'` on the power manifold, a copy of `p` - Is created, so that the (by k>2 implicitly generated) parallel Douglas Rachford does - not work in-place for now. - -If you provide a [`ManifoldProximalMapObjective`](@ref) `mpo` instead, the proximal maps are kept unchanged. - -# Input -* `M`: a Riemannian Manifold ``\mathcal M`` -* `f`: a cost function consisting of a sum of cost functions -* `proxes_f`: functions of the form `(M, λ, p)->q` or `(M, q, λ, p)->q` performing a proximal map, - where `⁠λ` denotes the proximal parameter, for each of the summands of `f`. -* `p`: initial point ``p ∈ \mathcal M`` - -For more options, see [`DouglasRachford`](@ref). -""" +@doc "$(_doc_Douglas_Rachford)" DouglasRachford!(::AbstractManifold, args...; kwargs...) function DouglasRachford!( M::AbstractManifold, @@ -377,19 +368,19 @@ function prepare_proxes(proxes_f, parallel, evaluation::AbstractEvaluationType) return prox1, prox2, parallel_ end function initialize_solver!(::AbstractManoptProblem, ::DouglasRachfordState) end -function step_solver!(amp::AbstractManoptProblem, drs::DouglasRachfordState, i) +function step_solver!(amp::AbstractManoptProblem, drs::DouglasRachfordState, k) M = get_manifold(amp) - get_proximal_map!(amp, drs.p_tmp, drs.λ(i), drs.s, 1) + get_proximal_map!(amp, drs.p_tmp, drs.λ(k), drs.s, 1) #dispatch on allocation type for the reflection, see below. _reflect!(M, drs.s_tmp, drs.p_tmp, drs.s, drs.R, drs.reflection_evaluation) - get_proximal_map!(amp, drs.p, drs.λ(i), drs.s_tmp, 2) + get_proximal_map!(amp, drs.p, drs.λ(k), drs.s_tmp, 2) _reflect!(M, drs.s_tmp, drs.p, drs.s_tmp, drs.R, drs.reflection_evaluation) # relaxation drs.s = retract( M, drs.s, inverse_retract(M, drs.s, drs.s_tmp, drs.inverse_retraction_method), - drs.α(i), + drs.α(k), drs.retraction_method, ) return drs diff --git a/src/solvers/FrankWolfe.jl b/src/solvers/FrankWolfe.jl index 842899f9e9..56f7ff9087 100644 --- a/src/solvers/FrankWolfe.jl +++ b/src/solvers/FrankWolfe.jl @@ -1,4 +1,11 @@ -@doc raw""" + +_doc_FW_sub = raw""" +```math + \operatorname*{arg\,min}_{q ∈ C} ⟨\operatorname{grad} f(p_k), \log_{p_k}q⟩. +``` +""" + +@doc """ FrankWolfeState <: AbstractManoptSolverState A struct to store the current state of the [`Frank_Wolfe_method`](@ref) @@ -7,24 +14,40 @@ It comes in two forms, depending on the realisation of the `subproblem`. # Fields -* `p`: the current iterate, a point on the manifold -* `X`: the current gradient ``\operatorname{grad} F(p)``, a tangent vector to `p`. -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction method to use within Frank Wolfe. -* `sub_problem`: an [`AbstractManoptProblem`](@ref) problem or a function `(M, p, X) -> q` or `(M, q, p, X)` for the a closed form solution of the sub problem -* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver or an [`AbstractEvaluationType`](@ref) in case the sub problem is provided as a function -* `stop`: ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenGradientNormLess`](@ref)`(1.0e-6)`) a [`StoppingCriterion`](@ref) -* `stepsize`: ([`DecreasingStepsize`](@ref)`(; length=2.0, shift=2)`) ``s_k`` which by default is set to ``s_k = \frac{2}{k+2}``. -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a retraction to use within Frank-Wolfe +* $_field_iterate +* $_field_gradient +* $_field_inv_retr +* $_field_sub_problem +* $_field_sub_state +* $_field_stop +* $_field_step +* $_field_retr The sub task requires a method to solve -```math - \operatorname*{arg\,min}_{q ∈ C} ⟨\operatorname{grad} f(p_k), \log_{p_k}q⟩. -``` +$_doc_FW_sub # Constructor - FrankWolfeState(M, p, X, sub_problem, sub_state) + FrankWolfeState(M, p, sub_problem, sub_state; kwargs...) + +Initialise the Frank Wolfe method state with. + +## Input + +* $_arg_M +* $_arg_p +* $_arg_X +* $_arg_sub_problem +* $_arg_sub_state + +## Keyword arguments + +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`$_sc_any[`StopWhenGradientNormLess`](@ref)`(1e-6)` $_kw_stop_note +* `stepsize=`[`default_stepsize`](@ref)`(M, FrankWolfeState)` +* $_kw_retraction_method_default +* $_kw_inverse_retraction_method_default +* $_kw_X_default where the remaining fields from before are keyword arguments. """ @@ -51,9 +74,9 @@ mutable struct FrankWolfeState{ p::P, sub_problem::Pr, sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}; - initial_vector::T=zero_vector(M, p), - stopping_criterion::TStop=StopAfterIteration(200) | - StopWhenGradientNormLess(1.0e-6), + initial_vector::T=zero_vector(M, p), #deprecated + X::T=initial_vector, + stopping_criterion::TStop=StopAfterIteration(200) | StopWhenGradientNormLess(1e-6), stepsize::TStep=default_stepsize(M, FrankWolfeState), retraction_method::TM=default_retraction_method(M, typeof(p)), inverse_retraction_method::ITM=default_inverse_retraction_method(M, typeof(p)), @@ -118,24 +141,31 @@ function show(io::IO, fws::FrankWolfeState) return print(io, s) end -@doc raw""" +_doc_FW_problem = raw""" +```math + \operatorname*{arg\,min}_{p∈\mathcal C} f(p), +``` +""" +_doc_FW_sk_default = raw"``s_k = \frac{2}{k+2}``" +_doc_Frank_Wolfe_method = """ Frank_Wolfe_method(M, f, grad_f, p) Frank_Wolfe_method(M, gradient_objective, p; kwargs...) + Frank_Wolfe_method!(M, f, grad_f, p; kwargs...) + Frank_Wolfe_method!(M, gradient_objective, p; kwargs...) -Perform the Frank-Wolfe algorithm to compute for ``\mathcal C \subset \mathcal M`` +Perform the Frank-Wolfe algorithm to compute for ``$_l_C_subset_M`` +the constrained problem -```math - \operatorname*{arg\,min}_{p∈\mathcal C} f(p), -``` +$_doc_FW_problem where the main step is a constrained optimisation is within the algorithm, that is the sub problem (Oracle) -```math - q_k = \operatorname*{arg\,min}_{q ∈ C} ⟨\operatorname{grad} f(p_k), \log_{p_k}q⟩. -``` +$_doc_FW_sub + +for every iterate ``p_k`` together with a stepsize ``s_k≤1``. +The algorhtm can be performed in-place of `p`. -for every iterate ``p_k`` together with a stepsize ``s_k≤1``, by default ``s_k = \frac{2}{k+2}``. This algorithm is inspired by but slightly more general than [WeberSra:2022](@cite). The next iterate is then given by ``p_{k+1} = γ_{p_k,q_k}(s_k)``, @@ -144,48 +174,68 @@ use a retraction and its inverse. # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f: \mathcal M→ℝ`` to find a minimizer ``p^*`` for -* `grad_f`: the gradient ``\operatorname{grad}f: \mathcal M → T\mathcal M`` of f -* `p`: an initial value ``p ∈ \mathcal C``, note that it really has to be a feasible point +$_arg_M +$_arg_f +$_arg_grad_f +$_arg_p -Alternatively to `f` and `grad_f` you can provide -the [`AbstractManifoldGradientObjective`](@ref) `gradient_objective` directly. +$_arg_alt_mgo # Keyword arguments -* `evaluation`: ([`AllocatingEvaluation`](@ref)) whether `grad_f` is an in-place or allocating (default) function -* `initial_vector`: (`zero_vectoir(M,p)`) how to initialize the inner gradient tangent vector -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(500) | `[`StopWhenGradientNormLess`](@ref)`(1.0e-6)`) a stopping criterion -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a type of retraction -* `stepsize`: ([`DecreasingStepsize`](@ref)`(; length=2.0, shift=2)` a [`Stepsize`](@ref) to use; - it has to be always less than 1. The default is the one proposed by Frank & Wolfe: ``s_k = \frac{2}{k+2}``. -* `sub_cost`: ([`FrankWolfeCost`](@ref)`(p, initiel_vector)`) the cost of the Frank-Wolfe sub problem - which by default uses the current iterate and (sub)gradient of the current iteration to define a default cost, - this is used to define the default `sub_objective`. It is ignored, if you set that or the `sub_problem` directly -* `sub_grad`: ([`FrankWolfeGradient`](@ref)`(p, initial_vector)`) the gradient of the Frank-Wolfe sub problem - which by default uses the current iterate and (sub)gradient of the current iteration to define a default gradient - this is used to define the default `sub_objective`. It is ignored, if you set that or the `sub_problem` directly -* `sub_objective`: ([`ManifoldGradientObjective`](@ref)`(sub_cost, sub_gradient)`) the objective for the Frank-Wolfe sub problem - this is used to define the default `sub_problem`. It is ignored, if you set the `sub_problem` manually -* `sub_problem`: ([`DefaultManoptProblem`](@ref)`(M, sub_objective)`) the Frank-Wolfe sub problem to solve. +* $_kw_evaluation_default: + $_kw_evaluation $_kw_evaluation_example + +* $_kw_retraction_method_default: + $_kw_retraction_method + +* `stepsize=`[`DecreasingStepsize`](@ref)`(; length=2.0, shift=2)`: + $_kw_stepsize, where the default is the step size $_doc_FW_sk_default + +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(500)`$_sc_any[`StopWhenGradientNormLess`](@ref)`(1.0e-6)`) + $_kw_stopping_criterion + +* $_kw_X_default: + $_kw_X, the evaluated gradient ``$_l_grad f`` evaluated at ``p^{(k)}``. + +* `sub_cost=`[`FrankWolfeCost`](@ref)`(p, X)`: + the cost of the Frank-Wolfe sub problem. $(_kw_used_in("sub_objective")) + +* `sub_grad=`[`FrankWolfeGradient`](@ref)`(p, X)`: + the gradient of the Frank-Wolfe sub problem. $(_kw_used_in("sub_objective")) + +* $_kw_sub_kwargs_default: $_kw_sub_kwargs + +* `sub_objective=`[`ManifoldGradientObjective`](@ref)`(sub_cost, sub_gradient)`: + the objective for the Frank-Wolfe sub problem. $(_kw_used_in("sub_problem")) + +* `sub_problem=`[`DefaultManoptProblem`](@ref)`(M, sub_objective)`): the sub problem to solve. This can be given in three forms - 1. as an [`AbstractManoptProblem`](@ref), then the `sub_state` specifies the solver to use + 1. as an [`AbstractManoptProblem`](@ref), then the `sub_state=` specifies the solver to use 2. as a closed form solution, as a function evaluating with new allocations `(M, p, X) -> q` that solves the sub problem on `M` given the current iterate `p` and (sub)gradient `X`. 3. as a closed form solution, as a function `(M, q, p, X) -> q` working in place of `q`. For points 2 and 3 the `sub_state` has to be set to the corresponding [`AbstractEvaluationType`](@ref), [`AllocatingEvaluation`](@ref) and [`InplaceEvaluation`](@ref), respectively -* `sub_state`: (`evaluation` if `sub_problem` is a function, a decorated [`GradientDescentState`](@ref) otherwise) - for a function, the evaluation is inherited from the Frank-Wolfe `evaluation` keyword. -* `sub_kwargs`: (`(;)`) keyword arguments to decorate the `sub_state` default state in case the `sub_problem` is not a function + This keyword takes further into account `sub_kwargs` to evejtually decorate the problem + +* `sub_state= if sub_problem isa Function evaluation else GradientDescentState(M, copy(M,p); kwargs...)`: + + specify either the solver for a `sub_problem` or the kind of evaluation if the sub problem is given by a closed form solution + this keyword takes into account the `sub_stopping_criterion`, and the `sub_kwargs`, that are also used to potentially decorate the state. -All other keyword arguments are passed to [`decorate_state!`](@ref) for decorators or -[`decorate_objective!`](@ref), respectively. -If you provide the [`ManifoldGradientObjective`](@ref) directly, these decorations can still be specified +* `sub_stopping_criterion=`[`StopAfterIteration`](@ref)`(300)`$_sc_any[`StopWhenStepsizeLess`](@ref)`(1e-8)`: + $_kw_stopping_criterion for the sub solver. $(_kw_used_in("sub_state")) + +$_kw_others + +If you provide the [`ManifoldGradientObjective`](@ref) directly, the `evaluation=` keyword is ignored. +The decorations are still applied to the objective. # Output the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details """ + +@doc "$_doc_Frank_Wolfe_method" Frank_Wolfe_method(M::AbstractManifold, args...; kwargs...) function Frank_Wolfe_method( M::AbstractManifold, @@ -221,14 +271,7 @@ function Frank_Wolfe_method( return Frank_Wolfe_method!(M, mgo, q; kwargs...) end -@doc raw""" - Frank_Wolfe_method!(M, f, grad_f, p; kwargs...) - Frank_Wolfe_method!(M, gradient_objective, p; kwargs...) - -Perform the Frank Wolfe method in place of `p`. - -For all options and keyword arguments, see [`Frank_Wolfe_method`](@ref). -""" +@doc "$_doc_Frank_Wolfe_method" Frank_Wolfe_method!(M::AbstractManifold, args...; kwargs...) function Frank_Wolfe_method!( M::AbstractManifold, @@ -245,7 +288,8 @@ function Frank_Wolfe_method!( M::AbstractManifold, mgo::O, p; - initial_vector=zero_vector(M, p), + initial_vector=zero_vector(M, p), #deprecated + X=initial_vector, evaluation=AllocatingEvaluation(), objective_type=:Riemannian, retraction_method=default_retraction_method(M, typeof(p)), @@ -253,8 +297,8 @@ function Frank_Wolfe_method!( stopping_criterion::TStop=StopAfterIteration(200) | StopWhenGradientNormLess(1.0e-8) | StopWhenChangeLess(1.0e-8), - sub_cost=FrankWolfeCost(p, initial_vector), - sub_grad=FrankWolfeGradient(p, initial_vector), + sub_cost=FrankWolfeCost(p, X), + sub_grad=FrankWolfeGradient(p, X), sub_kwargs=(;), sub_objective=ManifoldGradientObjective(sub_cost, sub_grad), sub_problem=DefaultManoptProblem( @@ -307,14 +351,14 @@ function initialize_solver!(amp::AbstractManoptProblem, fws::FrankWolfeState) get_gradient!(amp, fws.X, fws.p) return fws end -function step_solver!(amp::AbstractManoptProblem, fws::FrankWolfeState, i) +function step_solver!(amp::AbstractManoptProblem, fws::FrankWolfeState, k) M = get_manifold(amp) # update gradient get_gradient!(amp, fws.X, fws.p) # evaluate grad F(p), store the result in O.X # solve sub task solve!(fws.sub_problem, fws.sub_state) # call the subsolver q = get_solver_result(fws.sub_state) - s = fws.stepsize(amp, fws, i) + s = fws.stepsize(amp, fws, k) # step along the geodesic retract!( M, @@ -331,13 +375,13 @@ end function step_solver!( amp::AbstractManoptProblem, fws::FrankWolfeState{P,T,F,ClosedFormSubSolverState{InplaceEvaluation}}, - i, + k, ) where {P,T,F} M = get_manifold(amp) get_gradient!(amp, fws.X, fws.p) # evaluate grad F in place for O.X q = copy(M, fws.p) fws.sub_problem(M, q, fws.p, fws.X) # evaluate the closed form solution and store the result in q - s = fws.stepsize(amp, fws, i) + s = fws.stepsize(amp, fws, k) # step along the geodesic retract!( M, @@ -354,13 +398,13 @@ end function step_solver!( amp::AbstractManoptProblem, fws::FrankWolfeState{P,T,F,ClosedFormSubSolverState{AllocatingEvaluation}}, - i, + k, ) where {P,T,F} M = get_manifold(amp) get_gradient!(amp, fws.X, fws.p) # evaluate grad F in place for O.X q = fws.sub_problem(M, fws.p, fws.X) # evaluate the closed form solution and store the result in O.p # step along the geodesic - s = fws.stepsize(amp, fws, i) + s = fws.stepsize(amp, fws, k) # step along the geodesic retract!( M, diff --git a/src/solvers/Lanczos.jl b/src/solvers/Lanczos.jl index 121d65db35..7484d05fb7 100644 --- a/src/solvers/Lanczos.jl +++ b/src/solvers/Lanczos.jl @@ -2,7 +2,7 @@ # # Lanczos sub solver # -@doc raw""" +@doc """ LanczosState{P,T,SC,B,I,R,TM,V,Y} <: AbstractManoptSolverState Solve the adaptive regularized subproblem with a Lanczos iteration @@ -10,14 +10,29 @@ Solve the adaptive regularized subproblem with a Lanczos iteration # Fields * `stop`: the stopping criterion +* `stop_newton`: the stopping criterion for the inner Newton iteration * `σ`: the current regularization parameter * `X`: the Iterate * `Lanczos_vectors`: the obtained Lanczos vectors * `tridig_matrix`: the tridiagonal coefficient matrix T * `coefficients`: the coefficients ``y_1,...y_k`` that determine the solution -* `Hp`: a temporary vector containing the evaluation of the Hessian -* `Hp_residual`: a temporary vector containing the residual to the Hessian +* `Hp`: a temporary tangent vector containing the evaluation of the Hessian +* `Hp_residual`: a temporary tangent vector containing the residual to the Hessian * `S`: the current obtained / approximated solution + +# Constructor + + LanczosState(TpM::TangentSpace; kwargs...) + +## Keyword arguments + +* $_kw_X_default: the iterate using the manifold of the tangent space. +* `maxIterLanzcos=200`: shortcut to set the maximal number of iterations in the ` stopping_crtierion=` +* `θ=0.5`: set the parameter in the [`StopWhenFirstOrderProgress`](@ref) within the default `stopping_criterion=`. +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(maxIterLanczos)`$_sc_any[`StopWhenFirstOrderProgress`](@ref)`(θ)`: + the stopping criterion for the Lanczos iteration. +* `stopping_criterion_newtown=`[`StopAfterIteration`](@ref)`(200)`: the stopping criterion for the inner Newton iteration. +* `σ=10.0`: specify the regularization parameter """ mutable struct LanczosState{T,R,SC,SCN,B,TM,C} <: AbstractManoptSolverState X::T @@ -108,12 +123,12 @@ function initialize_solver!(dmp::AbstractManoptProblem{<:TangentSpace}, ls::Lanc return ls end -function step_solver!(dmp::AbstractManoptProblem{<:TangentSpace}, ls::LanczosState, i) +function step_solver!(dmp::AbstractManoptProblem{<:TangentSpace}, ls::LanczosState, k) TpM = get_manifold(dmp) p = TpM.point M = base_manifold(TpM) arcmo = get_objective(dmp) - if i == 1 #the first is known directly + if k == 1 #the first is known directly nX = norm(M, p, ls.X) if length(ls.Lanczos_vectors) == 0 push!(ls.Lanczos_vectors, ls.X ./ nX) @@ -130,65 +145,65 @@ function step_solver!(dmp::AbstractManoptProblem{<:TangentSpace}, ls::LanczosSta else # `i > 1` β = norm(M, p, ls.Hp_residual) if β > 1e-12 # Obtained new orthogonal Lanczos long enough with respect to numerical stability - if length(ls.Lanczos_vectors) < i + if length(ls.Lanczos_vectors) < k push!(ls.Lanczos_vectors, ls.Hp_residual ./ β) else - copyto!(M, ls.Lanczos_vectors[i], p, ls.Hp_residual ./ β) + copyto!(M, ls.Lanczos_vectors[k], p, ls.Hp_residual ./ β) end else # Generate new random vector and # modified Gram Schmidt of new vector with respect to Q rand!(M, ls.Hp_residual; vector_at=p) - for k in 1:(i - 1) + for k in 1:(k - 1) ls.Hp_residual .= ls.Hp_residual - inner(M, p, ls.Lanczos_vectors[k], ls.Hp_residual) * ls.Lanczos_vectors[k] end - if length(ls.Lanczos_vectors) < i + if length(ls.Lanczos_vectors) < k push!(ls.Lanczos_vectors, ls.Hp_residual ./ norm(M, p, ls.Hp_residual)) else copyto!( M, - ls.Lanczos_vectors[i], + ls.Lanczos_vectors[k], p, ls.Hp_residual ./ norm(M, p, ls.Hp_residual), ) end end # Update Hessian and residual - get_objective_hessian!(M, ls.Hp, arcmo, p, ls.Lanczos_vectors[i]) - ls.Hp_residual .= ls.Hp - β * ls.Lanczos_vectors[i - 1] - α = inner(M, p, ls.Hp_residual, ls.Lanczos_vectors[i]) - ls.Hp_residual .= ls.Hp_residual - α * ls.Lanczos_vectors[i] + get_objective_hessian!(M, ls.Hp, arcmo, p, ls.Lanczos_vectors[k]) + ls.Hp_residual .= ls.Hp - β * ls.Lanczos_vectors[k - 1] + α = inner(M, p, ls.Hp_residual, ls.Lanczos_vectors[k]) + ls.Hp_residual .= ls.Hp_residual - α * ls.Lanczos_vectors[k] # Update tridiagonal matrix - ls.tridig_matrix[i, i] = α - ls.tridig_matrix[i - 1, i] = β - ls.tridig_matrix[i, i - 1] = β - min_cubic_Newton!(dmp, ls, i) + ls.tridig_matrix[k, k] = α + ls.tridig_matrix[k - 1, k] = β + ls.tridig_matrix[k, k - 1] = β + min_cubic_Newton!(dmp, ls, k) end - copyto!(M, ls.S, p, sum(ls.Lanczos_vectors[k] * ls.coefficients[k] for k in 1:i)) + copyto!(M, ls.S, p, sum(ls.Lanczos_vectors[k] * ls.coefficients[k] for k in 1:k)) return ls end # # Solve Lanczos sub problem # -function min_cubic_Newton!(mp::AbstractManoptProblem{<:TangentSpace}, ls::LanczosState, i) +function min_cubic_Newton!(mp::AbstractManoptProblem{<:TangentSpace}, ls::LanczosState, k) TpM = get_manifold(mp) p = TpM.point M = base_manifold(TpM) tol = 1e-16 - gvec = zeros(i) + gvec = zeros(k) gvec[1] = norm(M, p, ls.X) - λ = opnorm(Array(@view ls.tridig_matrix[1:i, 1:i])) + 2 - T_λ = @view(ls.tridig_matrix[1:i, 1:i]) + λ * I + λ = opnorm(Array(@view ls.tridig_matrix[1:k, 1:k])) + 2 + T_λ = @view(ls.tridig_matrix[1:k, 1:k]) + λ * I - λ_min = eigmin(Array(@view ls.tridig_matrix[1:i, 1:i])) + λ_min = eigmin(Array(@view ls.tridig_matrix[1:k, 1:k])) lower_barrier = max(0, -λ_min) - k = 0 - y = zeros(i) - while !ls.stop_newton(mp, ls, k) - k += 1 + j = 0 + y = zeros(k) + while !ls.stop_newton(mp, ls, j) + j += 1 y = -(T_λ \ gvec) ynorm = norm(y, 2) ϕ = 1 / ynorm - ls.σ / λ # when ϕ is "zero" then y is the solution. @@ -215,37 +230,42 @@ function min_cubic_Newton!(mp::AbstractManoptProblem{<:TangentSpace}, ls::Lanczo T_λ = T_λ + Δλ * I λ = λ + Δλ end - ls.coefficients[1:i] .= y + ls.coefficients[1:k] .= y return ls.coefficients end # # Stopping Criteria # -@doc raw""" +_math_sc_firstorder = raw""" +```math +m(X_k) \leq m(0) +\quad\text{ and }\quad +\lVert \operatorname{grad} m(X_k) \rVert ≤ θ \lVert X_k \rVert^2 +``` +""" + +@doc """ StopWhenFirstOrderProgress <: StoppingCriterion A stopping criterion related to the Riemannian adaptive regularization with cubics (ARC) solver indicating that the model function at the current (outer) iterate, -```math - m(X) = f(p) + - + \frac{1}{2} + \frac{σ}{3} \lVert X \rVert^3, -``` +$_doc_ARC_mdoel -defined on the tangent space ``T_{p}\mathcal M`` fulfills at the current iterate ``X_k`` that +defined on the tangent space ``$(_l_TpM())`` fulfills at the current iterate ``X_k`` that -```math -m(X_k) \leq m(0) -\quad\text{ and }\quad -\lVert \operatorname{grad} m(X_k) \rVert ≤ θ \lVert X_k \rVert^2 -``` +$_math_sc_firstorder # Fields * `θ`: the factor ``θ`` in the second condition -* `at_iteration`: indicates at which iteration (including `i=0`) the stopping criterion - was fulfilled and is `-1` while it is not fulfilled. +* $_field_at_iteration + +# Constructor + + StopWhenAllLanczosVectorsUsed(θ) + """ mutable struct StopWhenFirstOrderProgress{F} <: StoppingCriterion θ::F @@ -253,9 +273,9 @@ mutable struct StopWhenFirstOrderProgress{F} <: StoppingCriterion StopWhenFirstOrderProgress(θ::F) where {F} = new{F}(θ, -1) end function (c::StopWhenFirstOrderProgress)( - dmp::AbstractManoptProblem{<:TangentSpace}, ls::LanczosState, i::Int + dmp::AbstractManoptProblem{<:TangentSpace}, ls::LanczosState, k::Int ) - if (i == 0) + if (k == 0) if norm(ls.X) == zero(eltype(ls.X)) c.at_iteration = 0 return true @@ -268,12 +288,12 @@ function (c::StopWhenFirstOrderProgress)( p = TpM.point M = base_manifold(TpM) nX = norm(M, p, get_gradient(dmp, p)) - y = @view(ls.coefficients[1:(i - 1)]) - Ty = @view(ls.tridig_matrix[1:i, 1:(i - 1)]) * y + y = @view(ls.coefficients[1:(k - 1)]) + Ty = @view(ls.tridig_matrix[1:k, 1:(k - 1)]) * y ny = norm(y) - model_grad_norm = norm(nX .* [1, zeros(i - 1)...] + Ty + ls.σ * ny * [y..., 0]) + model_grad_norm = norm(nX .* [1, zeros(k - 1)...] + Ty + ls.σ * ny * [y..., 0]) prog = (model_grad_norm <= c.θ * ny^2) - (prog) && (c.at_iteration = i) + (prog) && (c.at_iteration = k) return prog end function get_reason(c::StopWhenFirstOrderProgress) @@ -286,9 +306,9 @@ function get_reason(c::StopWhenFirstOrderProgress) return "" end function (c::StopWhenFirstOrderProgress)( - dmp::AbstractManoptProblem{<:TangentSpace}, ams::AbstractManoptSolverState, i::Int + dmp::AbstractManoptProblem{<:TangentSpace}, ams::AbstractManoptSolverState, k::Int ) - if (i == 0) + if (k == 0) c.at_iteration = -1 return false end @@ -300,7 +320,7 @@ function (c::StopWhenFirstOrderProgress)( # norm of current iterate nX = norm(base_manifold(TpM), p, q) prog = (nG <= c.θ * nX^2) - prog && (c.at_iteration = i) + prog && (c.at_iteration = k) return prog end function status_summary(c::StopWhenFirstOrderProgress) diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index 298b650d42..542d365cba 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -1,23 +1,29 @@ -@doc raw""" +_doc_LM_formula = raw""" +```math +\operatorname*{arg\,min}_{p ∈ \mathcal M} \frac{1}{2} \lVert f(p) \rVert^2, +``` +""" +_doc_LM = """ LevenbergMarquardt(M, f, jacobian_f, p, num_components=-1) + LevenbergMarquardt!(M, f, jacobian_f, p, num_components=-1; kwargs...) Solve an optimization problem of the form -```math -\operatorname*{arg\,min}_{p ∈ \mathcal M} \frac{1}{2} \lVert f(p) \rVert^2, -``` +$(_doc_LM_formula) -where ``f: \mathcal M → ℝ^d`` is a continuously differentiable function, +where ``f: $(_l_M) → ℝ^d`` is a continuously differentiable function, using the Riemannian Levenberg-Marquardt algorithm [Peeters:1993](@cite). -The implementation follows Algorithm 1 [AdachiOkunoTakeda:2022](@cite) +The implementation follows Algorithm 1 [AdachiOkunoTakeda:2022](@cite). +The second signature performs the optimization in-place of `p`. # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f: \mathcal M→ℝ^d`` + +$(_arg_M) +* `f`: a cost function ``f: $(_l_M) M→ℝ^d`` * `jacobian_f`: the Jacobian of ``f``. The Jacobian is supposed to accept a keyword argument `basis_domain` which specifies basis of the tangent space at a given point in which the Jacobian is to be calculated. By default it should be the `DefaultOrthonormalBasis`. -* `p`: an initial value ``p ∈ \mathcal M`` +$(_arg_p) * `num_components`: length of the vector returned by the cost function (`d`). By default its value is -1 which means that it is determined automatically by calling `f` one additional time. This is only possible when `evaluation` is `AllocatingEvaluation`, @@ -26,33 +32,29 @@ The implementation follows Algorithm 1 [AdachiOkunoTakeda:2022](@cite) These can also be passed as a [`NonlinearLeastSquaresObjective`](@ref), then the keyword `jacobian_tangent_basis` below is ignored -# Optional +# Keyword arguments -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by allocation (default) form `gradF(M, x)` - or [`InplaceEvaluation`](@ref) in place of the form `gradF!(M, X, x)`. -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a `retraction(M,x,ξ)` to use. -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenGradientNormLess`](@ref)`(1e-12)`) - a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. -* `expect_zero_residual`: (`false`) whether or not the algorithm might expect that the value of +* $(_kw_evaluation_default): $(_kw_evaluation) +* `η=0.2`: scaling factor for the sufficient cost decrease threshold required to accept new proposal points. Allowed range: `0 < η < 1`. +* `expect_zero_residual=false`: whether or not the algorithm might expect that the value of residual (objective) at minimum is equal to 0. -* `η`: scaling factor for the sufficient cost decrease threshold required to accept new proposal points. Allowed range: `0 < η < 1`. -* `damping_term_min`: initial (and also minimal) value of the damping term -* `β`: parameter by which the damping term is multiplied when the current new point is rejected -* `initial_residual_values`: the initial residual vector of the cost function `f`. + $(_kw_stopping_criterion) +* `damping_term_min=0.1`: initial (and also minimal) value of the damping term +* `β=5.0`: parameter by which the damping term is multiplied when the current new point is rejected * `initial_jacobian_f`: the initial Jacobian of the cost function `f`. + By default this is a matrix of size `num_components` times the manifold dimension of similar type as `p`. +* `initial_residual_values`: the initial residual vector of the cost function `f`. + By default this is a vector of length `num_components` of similar type as `p`. * `jacobian_tangent_basis`: an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) specify the basis of the tangent space for `jacobian_f`. +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`$(_sc_any)[`StopWhenGradientNormLess`](@ref)`(1e-12)`: -All other keyword arguments are passed to [`decorate_state!`](@ref) for decorators or -[`decorate_objective!`](@ref), respectively. -If you provide the [`ManifoldGradientObjective`](@ref) directly, these decorations can still be specified - - -# Output - -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$(_kw_others) -# References +$(_doc_sec_output) """ + +@doc "$(_doc_LM)" LevenbergMarquardt(M::AbstractManifold, args...; kwargs...) function LevenbergMarquardt( M::AbstractManifold, @@ -110,11 +112,7 @@ function LevenbergMarquardt( return LevenbergMarquardt!(M, nlso, q; kwargs...) end -@doc raw""" - LevenbergMarquardt!(M, f, jacobian_f, p, num_components=-1; kwargs...) - -For more options see [`LevenbergMarquardt`](@ref). -""" +@doc "$(_doc_LM)" LevenbergMarquardt!(M::AbstractManifold, args...; kwargs...) function LevenbergMarquardt!( M::AbstractManifold, @@ -258,7 +256,7 @@ end function step_solver!( dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, lms::LevenbergMarquardtState, - i::Integer, + k::Integer, ) where {mT<:AbstractManifold} # `o.residual_values` is either initialized by `initialize_solver!` or taken from the previous iteration M = get_manifold(dmp) diff --git a/src/solvers/NelderMead.jl b/src/solvers/NelderMead.jl index 519c286125..42f734c343 100644 --- a/src/solvers/NelderMead.jl +++ b/src/solvers/NelderMead.jl @@ -8,7 +8,8 @@ A simplex for the Nelder-Mead algorithm. NelderMeadSimplex(M::AbstractManifold) -Construct a simplex using ``n+1`` random points from manifold `M`, where ``n`` is the manifold dimension of `M`. +Construct a simplex using ``d+1`` random points from manifold `M`, +where ``d`` is the $(_link_manifold_dimension("")) of `M`. NelderMeadSimplex( M::AbstractManifold, @@ -49,7 +50,7 @@ function NelderMeadSimplex( return NelderMeadSimplex(pts) end -@doc raw""" +@doc """ NelderMeadState <: AbstractManoptSolverState Describes all parameters and the state of a Nelder-Mead heuristic based @@ -61,25 +62,35 @@ The naming of these parameters follows the [Wikipedia article](https://en.wikipe of the Euclidean case. The default is given in brackets, the required value range after the description -* `population` an `Array{`point`,1}` of ``n+1`` points ``x_i``, ``i=1,…,n+1``, where ``n`` is the - dimension of the manifold. -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(2000) | `[`StopWhenPopulationConcentrated`](@ref)`()`) a [`StoppingCriterion`](@ref) -* `α`: (`1.`) reflection parameter (``α > 0``) -* `γ`: (`2.`) expansion parameter (``γ > 0``) -* `ρ`: (`1/2`) contraction parameter, ``0 < ρ ≤ \frac{1}{2}``, -* `σ`: (`1/2`) shrink coefficient, ``0 < σ ≤ 1`` -* `p`: (`copy(population.pts[1])`) - a field to collect the current best value (initialized to _some_ point here) -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) the retraction to use. -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction to use. +* `population::`[`NelderMeadSimplex`](@ref): a population (set) of ``d+1`` points ``x_i``, ``i=1,…,n+1``, where ``d`` + is the $(_link_manifold_dimension("")) of `M`. +* $_field_step +* `α`: the reflection parameter ``α > 0``: +* `γ` the expansion parameter ``γ > 0``: +* `ρ`: the contraction parameter, ``0 < ρ ≤ \\frac{1}{2}``, +* `σ`: the shrinkage coefficient, ``0 < σ ≤ 1`` +* `p`: a field to store the current best value (initialized to _some_ point here) +* $_field_retr +* $_field_inv_retr # Constructors - NelderMeadState(M[, population::NelderMeadSimplex]; kwargs...) + NelderMeadState(M, population::NelderMeadSimplex=NelderMeadSimplex(M)); kwargs...) Construct a Nelder-Mead Option with a default population (if not provided) of set of `dimension(M)+1` random points stored in [`NelderMeadSimplex`](@ref). -In the constructor all fields (besides the population) are keyword arguments. +# Keyword arguments + +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(2000)`$_sc_any[`StopWhenPopulationConcentrated`](@ref)`()`): + a [`StoppingCriterion`](@ref) +* `α=1.0`: reflection parameter ``α > 0``: +* `γ=2.0` expansion parameter ``γ``: +* `ρ=1/2`: contraction parameter, ``0 < ρ ≤ \\frac{1}{2}``, +* `σ=1/2`: shrink coefficient, ``0 < σ ≤ 1`` +* $_kw_retraction_method_default: $_kw_retraction_method +* $_kw_inverse_retraction_method_default: $_kw_inverse_retraction_method`inverse_retraction_method=default_inverse_retraction_method(M, typeof(p))`: an inverse retraction to use. +* `p=copy(M, population.pts[1])`: initialise the storage for the best point (iterate)¨ """ mutable struct NelderMeadState{ T, @@ -168,48 +179,63 @@ function set_iterate!(O::NelderMeadState, ::AbstractManifold, p) return O end -@doc raw""" - NelderMead(M::AbstractManifold, f) - NelderMead(M::AbstractManifold, f, population::NelderMeadSimplex) - NelderMead(M::AbstractManifold, mco::AbstractManifoldCostObjective) - NelderMead(M::AbstractManifold, mco::AbstractManifoldCostObjective, population::NelderMeadSimplex) - -Solve a Nelder-Mead minimization problem for the cost function ``f: \mathcal M`` on the -manifold `M`. If the initial population `p` is not given, a random set of -points is chosen. - -This algorithm is adapted from the Euclidean Nelder-Mead method, see +_doc_NelderMead = """ + NelderMead(M::AbstractManifold, f, population=NelderMeadSimplex(M)) + NelderMead(M::AbstractManifold, mco::AbstractManifoldCostObjective, population=NelderMeadSimplex(M)) + NelderMead!(M::AbstractManifold, f, population) + NelderMead!(M::AbstractManifold, mco::AbstractManifoldCostObjective, population) + +Solve a Nelder-Mead minimization problem for the cost function ``f: $_l_M`` on the +manifold `M`. If the initial [`NelderMeadSimplex`](@ref) is not provided, a random set of +points is chosen. The compuation can be performed in-place of the `population`. + +The algorithm consists of the following steps. Let ``d`` denote the dimension of the manifold ``$_l_M``. + +1. Order the simplex vertices ``p_i, i=1,…,d+1`` by increasing cost, such that we have ``f(p_1) ≤ f(p_2) ≤ … ≤ f(p_{d+1})``. +2. Compute the Riemannian center of mass [Karcher:1977](@cite), cf. [`mean`](@extref Statistics.mean-Tuple{AbstractManifold, Vararg{Any}}), ``p_{$(_l_txt("m"))}`` + of the simplex vertices ``p_1,…,p_{d+1}``. +3. Reflect the point with the worst point at the mean ``p_{$(_l_txt("r"))} = $(_l_retr)_{p_{$(_l_txt("m"))}}\\bigl( - α$(_l_retr)^{-1}_{p_{$(_l_txt("m"))}} (p_{d+1}) \\bigr)`` + If ``f(p_1) ≤ f(p_{$(_l_txt("r"))}) ≤ f(p_{d})`` then set ``p_{d+1} = p_{$(_l_txt("r"))}`` and go to step 1. +4. Expand the simplex if ``f(p_{$(_l_txt("r"))}) < f(p_1)`` by computing the expantion point ``p_{$(_l_txt("e"))} = $(_l_retr)_{p_{$(_l_txt("m"))}}\\bigl( - γα$(_l_retr)^{-1}_{p_{$(_l_txt("m"))}} (p_{d+1}) \\bigr)``, + which in this formulation allows to reuse the tangent vector from the inverse retraction from before. + If ``f(p_{$(_l_txt("e"))}) < f(p_{$(_l_txt("r"))})`` then set ``p_{d+1} = p_{$(_l_txt("e"))}`` otherwise set set ``p_{d+1} = p_{$(_l_txt("r"))}``. Then go to Step 1. +5. Contract the simplex if ``f(p_{$(_l_txt("r"))}) ≥ f(p_d)``. + 1. If ``f(p_{$(_l_txt("r"))}) < f(p_{d+1})`` set the step ``s = -ρ`` + 2. otherwise set ``s=ρ``. + Compute the contraction point ``p_{$(_l_txt("c"))} = $(_l_retr)_{p_{$(_l_txt("m"))}}\\bigl(s$(_l_retr)^{-1}_{p_{$(_l_txt("m"))}} p_{d+1} \\bigr)``. + 1. in this case if ``f(p_{$(_l_txt("c"))}) < f(p_{$(_l_txt("r"))})`` set ``p_{d+1} = p_{$(_l_txt("c"))}`` and go to step 1 + 2. in this case if ``f(p_{$(_l_txt("c"))}) < f(p_{d+1})`` set ``p_{d+1} = p_{$(_l_txt("c"))}`` and go to step 1 +6. Shrink all points (closer to ``p_1``). For all ``i=2,...,d+1`` set + ``p_{i} = $(_l_retr)_{p_{1}}\\bigl( σ$(_l_retr)^{-1}_{p_{1}} p_{i} \\bigr).`` + +For more details, see The Euclidean variant in the Wikipedia [https://en.wikipedia.org/wiki/Nelder-Mead_method](https://en.wikipedia.org/wiki/Nelder-Mead_method) -and -[http://www.optimization-online.org/DB_FILE/2007/08/1742.pdf](http://www.optimization-online.org/DB_FILE/2007/08/1742.pdf). +or Algorithm 4.1 in [http://www.optimization-online.org/DB_FILE/2007/08/1742.pdf](http://www.optimization-online.org/DB_FILE/2007/08/1742.pdf). # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function to minimize -* `population`: (``n+1`` `rand(M)`s) an initial population of ``n+1`` points, where ``n`` - is the dimension of the manifold `M`. - -# Optional - -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(2000) | `[`StopWhenPopulationConcentrated`](@ref)`()`) a [`StoppingCriterion`](@ref) -* `α`: (`1.`) reflection parameter (``α > 0``) -* `γ`: (`2.`) expansion parameter (``γ``) -* `ρ`: (`1/2`) contraction parameter, ``0 < ρ ≤ \frac{1}{2}``, -* `σ`: (`1/2`) shrink coefficient, ``0 < σ ≤ 1`` -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) the retraction to use -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction to use. +$_arg_M +$_arg_f +* `population::`[`NelderMeadSimplex`](@ref)`=`[`NelderMeadSimplex`](@ref)`(M)`: an initial simplex of ``d+1`` points, where ``d`` + is the $(_link_manifold_dimension("")) of `M`. -and the ones that are passed to [`decorate_state!`](@ref) for decorators. +# Keyword arguments -!!! note - The manifold `M` used here has to either provide a `mean(M, pts)` or you have to - load `Manifolds.jl` to use its statistics part. +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(2000)`$_sc_any[`StopWhenPopulationConcentrated`](@ref)`()`): + a [`StoppingCriterion`](@ref) +* `α=1.0`: reflection parameter ``α > 0``: +* `γ=2.0` expansion parameter ``γ``: +* `ρ=1/2`: contraction parameter, ``0 < ρ ≤ \\frac{1}{2}``, +* `σ=1/2`: shrink coefficient, ``0 < σ ≤ 1`` +* $_kw_retraction_method_default: $_kw_retraction_method +* $_kw_inverse_retraction_method_default: $_kw_inverse_retraction_method`inverse_retraction_method=default_inverse_retraction_method(M, typeof(p))`: an inverse retraction to use. -# Output +$_kw_others -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$_doc_sec_output """ + +@doc "$(_doc_NelderMead)" NelderMead(M::AbstractManifold, args...; kwargs...) function NelderMead(M::AbstractManifold, f; kwargs...) return NelderMead(M, f, NelderMeadSimplex(M); kwargs...) @@ -232,15 +258,7 @@ function NelderMead( rs = NelderMead(M, f_, population_; kwargs...) return (P == eltype(rs)) ? rs[] : rs end -@doc raw""" - NelderMead(M::AbstractManifold, f [, population::NelderMeadSimplex]) - -Solve a Nelder Mead minimization problem for the cost function `f` on the -manifold `M`. If the initial population `population` is not given, a random set of -points is chosen. If it is given, the computation is done in place of `population`. - -For more options see [`NelderMead`](@ref). -""" +@doc "$(_doc_NelderMead)" NelderMead!(M::AbstractManifold, args...; kwargs...) function NelderMead!(M::AbstractManifold, f, population::NelderMeadSimplex; kwargs...) mco = ManifoldCostObjective(f) @@ -303,7 +321,7 @@ function step_solver!(mp::AbstractManoptProblem, s::NelderMeadState, ::Any) Costr = get_cost(mp, xr) continue_steps = true # is it better than the worst but not better than the best? - if Costr >= s.costs[1] && Costr < s.costs[end] + if Costr >= s.costs[1] && Costr < s.costs[end - 1] # store as last s.population.pts[end] = xr s.costs[end] = Costr @@ -378,9 +396,9 @@ mutable struct StopWhenPopulationConcentrated{F<:Real} <: StoppingCriterion end end function (c::StopWhenPopulationConcentrated)( - mp::AbstractManoptProblem, s::NelderMeadState, i::Int + mp::AbstractManoptProblem, s::NelderMeadState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end M = get_manifold(mp) @@ -390,7 +408,7 @@ function (c::StopWhenPopulationConcentrated)( s.population.pts[2:end], ) if c.value_f < c.tol_f && c.value_p < c.tol_p - c.at_iteration = i + c.at_iteration = k return true end return false diff --git a/src/solvers/adaptive_regularization_with_cubics.jl b/src/solvers/adaptive_regularization_with_cubics.jl index 850aa9057f..d7e74ba230 100644 --- a/src/solvers/adaptive_regularization_with_cubics.jl +++ b/src/solvers/adaptive_regularization_with_cubics.jl @@ -1,42 +1,53 @@ -@doc raw""" +@doc """ AdaptiveRegularizationState{P,T} <: AbstractHessianSolverState A state for the [`adaptive_regularization_with_cubics`](@ref) solver. # Fields -a default value is given in brackets if a parameter can be left out in initialization. - -* `η1`, `η2`: (`0.1`, `0.9`) bounds for evaluating the regularization parameter -* `γ1`, `γ2`: (`0.1`, `2.0`) shrinking and expansion factors for regularization parameter `σ` -* `p`: (`rand(M)` the current iterate -* `X`: (`zero_vector(M,p)`) the current gradient ``\operatorname{grad}f(p)`` -* `s`: (`zero_vector(M,p)`) the tangent vector step resulting from minimizing the model - problem in the tangent space ``\mathcal T_{p} \mathcal M`` -* `σ`: the current cubic regularization parameter -* `σmin`: (`1e-7`) lower bound for the cubic regularization parameter -* `ρ_regularization`: (`1e3`) regularization parameter for computing ρ. - When approaching convergence ρ may be difficult to compute with numerator and denominator approaching zero. - Regularizing the ratio lets ρ go to 1 near convergence. -* `evaluation`: (`AllocatingEvaluation()`) if you provide a -* `retraction_method`: (`default_retraction_method(M)`) the retraction to use -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(100)`) a [`StoppingCriterion`](@ref) -* `sub_problem`: sub problem solved in each iteration -* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver -Furthermore the following integral fields are defined - -* `q`: (`copy(M,p)`) a point for the candidates to evaluate model and ρ -* `H`: (`copy(M, p, X)`) the current Hessian, ``\operatorname{Hess}F(p)[⋅]`` -* `S`: (`copy(M, p, X)`) the current solution from the subsolver -* `ρ`: the current regularized ratio of actual improvement and model improvement. -* `ρ_denominator`: (`one(ρ)`) a value to store the denominator from the computation of ρ +* `η1`, `η1`: bounds for evaluating the regularization parameter +* `γ1`, `γ2`: shrinking and expansion factors for regularization parameter `σ` +* `H`: the current Hessian evaluation +* `s`: the current solution from the subsolver +* $_field_iterate +* `q`: a point for the candidates to evaluate model and ρ +* $_field_gradient +* `s`: the tangent vector step resulting from minimizing the model + problem in the tangent space ``$(_l_TpM())`` +* `σ`: the current cubic regularization parameter +* `σmin`: lower bound for the cubic regularization parameter +* `ρ_regularization`: regularization parameter for computing ρ. + When approaching convergence ρ may be difficult to compute with numerator and denominator approaching zero. + Regularizing the ratio lets ρ go to 1 near convergence. +* `ρ`: the current regularized ratio of actual improvement and model improvement. +* `ρ_denominator`: a value to store the denominator from the computation of ρ to allow for a warning or error when this value is non-positive. +* $_field_retr +* $_field_stop +* $_arg_sub_problem +* $_arg_sub_state + +Furthermore the following integral fields are defined # Constructor AdaptiveRegularizationState(M, p=rand(M); X=zero_vector(M, p); kwargs...) -Construct the solver state with all fields stated as keyword arguments. +Construct the solver state with all fields stated as keyword arguments and the following defaults + +* `η1=0.1` +* `η2=0.9` +* `γ1=0.1` +* `γ2=2.0` +* `σ=100/manifold_dimension(M)` +* `σmin=1e-7 +* `ρ_regularization=1e3` +* $_kw_evaluation_default +* $_kw_retraction_method_default +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(100)` +* `sub_objective=nothing` a shortcut to provide a subobjective. +* `sub_problem=nothing` is set to [`DefaultManoptProblem`](@ref) on the [`TangentSpace`](@extref ManifoldsBase `ManifoldsBase.TangentSpace`) of `p` if an `sub_objecive` is provided +* `sub_state` is set to [`AllocatingEvaluation`](@ref) if `sub_problem` is a function and to a [`LanczosState`](@ref) on the tangent space otherwise """ mutable struct AdaptiveRegularizationState{ P, @@ -85,7 +96,7 @@ function AdaptiveRegularizationState( σ::R=100.0 / sqrt(manifold_dimension(M)),# Had this to initial value of 0.01. However try same as in MATLAB: 100/sqrt(dim(M)) ρ_regularization::R=1e3, stopping_criterion::SC=StopAfterIteration(100), - retraction_method::RTM=default_retraction_method(M), + retraction_method::RTM=default_retraction_method(M, typeof(p)), σmin::R=1e-10, η1::R=0.1, η2::R=0.9, @@ -160,53 +171,64 @@ function show(io::IO, arcs::AdaptiveRegularizationState) return print(io, s) end -@doc raw""" +_doc_ARC_mdoel = raw""" +```math +m_k(X) = f(p_k) + ⟨X, \operatorname{grad} f(p^{(k)})⟩ + \frac{1}{2}⟨X, \operatorname{Hess} f(p^{(k)})[X]⟩ + \frac{σ_k}{3}\lVert X \rVert^3 +``` +""" + +_doc_ARC_improvement = raw""" +```math + ρ_k = \frac{f(p_k) - f(\operatorname{retr}_{p_k}(X_k))}{m_k(0) - m_k(X_k) + \frac{σ_k}{3}\lVert X_k\rVert^3}. +``` +""" +_doc_ARC_regularization_update = raw""" +```math +σ_{k+1} = +\begin{cases} + \max\{σ_{\min}, γ_1σ_k\} & \text{ if } ρ \geq η_2 &\text{ (the model was very successful)},\\ + σ_k & \text{ if } ρ ∈ [η_1, η_2)&\text{ (the model was successful)},\\ + γ_2σ_k & \text{ if } ρ < η_1&\text{ (the model was unsuccessful)}. +\end{cases} +``` +""" + +_doc_ARC = """ adaptive_regularization_with_cubics(M, f, grad_f, Hess_f, p=rand(M); kwargs...) adaptive_regularization_with_cubics(M, f, grad_f, p=rand(M); kwargs...) adaptive_regularization_with_cubics(M, mho, p=rand(M); kwargs...) + adaptive_regularization_with_cubics!(M, f, grad_f, Hess_f, p; kwargs...) + adaptive_regularization_with_cubics!(M, f, grad_f, p; kwargs...) + adaptive_regularization_with_cubics!(M, mho, p; kwargs...) Solve an optimization problem on the manifold `M` by iteratively minimizing -```math - m_k(X) = f(p_k) + ⟨X, \operatorname{grad} f(p_k)⟩ + \frac{1}{2}⟨X, \operatorname{Hess} f(p_k)[X]⟩ + \frac{σ_k}{3}\lVert X \rVert^3 -``` +$_doc_ARC_mdoel -on the tangent space at the current iterate ``p_k``, where ``X ∈ T_{p_k}\mathcal M`` and +on the tangent space at the current iterate ``p_k``, where ``X ∈ $(_l_TpM("p_k"))`` and ``σ_k > 0`` is a regularization parameter. -Let ``X_k`` denote the minimizer of the model ``m_k`` and use the model improvement +Let ``Xp^{(k)}`` denote the minimizer of the model ``m_k`` and use the model improvement -```math - ρ_k = \frac{f(p_k) - f(\operatorname{retr}_{p_k}(X_k))}{m_k(0) - m_k(X_k) + \frac{σ_k}{3}\lVert X_k\rVert^3}. -``` +$_doc_ARC_improvement With two thresholds ``η_2 ≥ η_1 > 0`` -set ``p_{k+1} = \operatorname{retr}_{p_k}(X_k)`` if ``ρ ≥ η_1`` +set ``p_{k+1} = $(_l_retr)_{p_k}(X_k)`` if ``ρ ≥ η_1`` and reject the candidate otherwise, that is, set ``p_{k+1} = p_k``. -Further update the regularization parameter using factors ``0 < γ_1 < 1 < γ_2`` +Further update the regularization parameter using factors ``0 < γ_1 < 1 < γ_2`` reads -```math -σ_{k+1} = -\begin{cases} - \max\{σ_{\min}, γ_1σ_k\} & \text{ if } ρ \geq η_2 &\text{ (the model was very successful)},\\ - σ_k & \text{ if } ρ ∈ [η_1, η_2)&\text{ (the model was successful)},\\ - γ_2σ_k & \text{ if } ρ < η_1&\text{ (the model was unsuccessful)}. -\end{cases} -``` +$_doc_ARC_regularization_update For more details see [AgarwalBoumalBullinsCartis:2020](@cite). # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``F: \mathcal M → ℝ`` to minimize -* `grad_f`: the gradient ``\operatorname{grad}F: \mathcal M → T \mathcal M`` of ``F`` -* `Hess_f`: (optional) the Hessian ``H( \mathcal M, x, ξ)`` of ``F`` -* `p`: an initial value ``p ∈ \mathcal M`` - -For the case that no Hessian is provided, the Hessian is computed using finite difference, see -[`ApproxHessianFiniteDifference`](@ref). +$_arg_M +$_arg_f +$_arg_grad_f +$_arg_Hess_f +$_arg_p the cost `f` and its gradient and Hessian might also be provided as a [`ManifoldHessianObjective`](@ref) @@ -214,31 +236,40 @@ the cost `f` and its gradient and Hessian might also be provided as a [`Manifold the default values are given in brackets -* `σ`: (`100.0 / sqrt(manifold_dimension(M)`) initial regularization parameter -* `σmin`: (`1e-10`) minimal regularization value ``σ_{\min}`` -* `η1`: (`0.1`) lower model success threshold -* `η2`: (`0.9`) upper model success threshold -* `γ1`: (`0.1`) regularization reduction factor (for the success case) -* `γ2`: (`2.0`) regularization increment factor (for the non-success case) -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by allocation (default) form `grad_f(M, p)` - or [`InplaceEvaluation`](@ref) in place, that is of the form `grad_f!(M, X, p)` and analogously for the Hessian. -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a retraction to use -* `initial_tangent_vector`: (`zero_vector(M, p)`) initialize any tangent vector data, -* `maxIterLanczos`: (`200`) a shortcut to set the stopping criterion in the sub solver, -* `ρ_regularization`: (`1e3`) a regularization to avoid dividing by zero for small values of cost and model -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(40) | `[`StopWhenGradientNormLess`](@ref)`(1e-9) | `[`StopWhenAllLanczosVectorsUsed`](@ref)`(maxIterLanczos)`) -* `sub_state`: [`LanczosState`](@ref)`(M, copy(M, p); maxIterLanczos=maxIterLanczos, σ=σ) +* `σ=100.0 / sqrt(manifold_dimension(M)`: initial regularization parameter +* `σmin=1e-10`: minimal regularization value ``σ_{\\min}`` +* `η1=0.1`: lower model success threshold +* `η2=0.9`: upper model success threshold +* `γ1=0.1`: regularization reduction factor (for the success case) +* `γ2=2.0`: regularization increment factor (for the non-success case) +* $_kw_evaluation_default: + $_kw_evaluation +* `initial_tangent_vector=zero_vector(M, p)`: initialize any tangent vector data, +* `maxIterLanczos=200`: a shortcut to set the stopping criterion in the sub solver, +* `ρ_regularization=1e3`: a regularization to avoid dividing by zero for small values of cost and model +* $_kw_retraction_method_default: + $_kw_retraction_method +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(40)`$_sc_any[`StopWhenGradientNormLess`](@ref)`(1e-9)`$_sc_any[`StopWhenAllLanczosVectorsUsed`](@ref)`(maxIterLanczos)`: + $_kw_stopping_criterion +* $_kw_sub_kwargs_default: + $_kw_sub_kwargs +* `sub_objective=nothing`: $_kw_sub_objective + $(_kw_sub_objective_default_text("AdaptiveRagularizationWithCubicsModelObjective")) +* `sub_state=`[`LanczosState`](@ref)`(M, copy(M, p); maxIterLanczos=maxIterLanczos, σ=σ)`: a state for the subproblem or an [`AbstractEvaluationType`](@ref) if the problem is a function. -* `sub_objective`: a shortcut to modify the objective of the subproblem used within in the -* `sub_problem`: [`DefaultManoptProblem`](@ref)`(M, sub_objective)` the problem (or a function) for the sub problem +* `sub_problem=`[`DefaultManoptProblem`](@ref)`(M, sub_objective)`: the problem (or a function) for the sub problem + +$_kw_others + +If you provide the [`ManifoldGradientObjective`](@ref) directly, the `evaluation=` keyword is ignored. +The decorations are still applied to the objective. -All other keyword arguments are passed to [`decorate_state!`](@ref) for state decorators or -[`decorate_objective!`](@ref) for objective, respectively. -If you provide the [`ManifoldGradientObjective`](@ref) directly, these decorations can still be specified +$_doc_remark_tutorial_debug -By default the `debug=` keyword is set to [`DebugIfEntry`](@ref)`(:ρ_denominator, >(0); message="Denominator nonpositive", type=:error)` -to avoid that by rounding errors the denominator in the computation of `ρ` gets nonpositive. +$_doc_sec_output """ + +@doc "$_doc_ARC" adaptive_regularization_with_cubics(M::AbstractManifold, args...; kwargs...) function adaptive_regularization_with_cubics( @@ -315,27 +346,7 @@ function adaptive_regularization_with_cubics( return adaptive_regularization_with_cubics!(M, mho, q; kwargs...) end -@doc raw""" - adaptive_regularization_with_cubics!(M, f, grad_f, Hess_f, p; kwargs...) - adaptive_regularization_with_cubics!(M, f, grad_f, p; kwargs...) - adaptive_regularization_with_cubics!(M, mho, p; kwargs...) - -evaluate the Riemannian adaptive regularization with cubics solver in place of `p`. - -# Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``F: \mathcal M → ℝ`` to minimize -* `grad_f`: the gradient ``\operatorname{grad}F: \mathcal M → T \mathcal M`` of ``F`` -* `Hess_f`: (optional) the Hessian ``H( \mathcal M, x, ξ)`` of ``F`` -* `p`: an initial value ``p ∈ \mathcal M`` - -For the case that no Hessian is provided, the Hessian is computed using finite difference, see -[`ApproxHessianFiniteDifference`](@ref). - -the cost `f` and its gradient and Hessian might also be provided as a [`ManifoldHessianObjective`](@ref) - -for more details and all options, see [`adaptive_regularization_with_cubics`](@ref). -""" +@doc "$_doc_ARC" adaptive_regularization_with_cubics!(M::AbstractManifold, args...; kwargs...) function adaptive_regularization_with_cubics!( M::AbstractManifold, @@ -376,15 +387,19 @@ function adaptive_regularization_with_cubics!( M::AbstractManifold, mho::O, p=rand(M); - debug=DebugIfEntry( - :ρ_denominator, >(-1e-8); message="denominator nonpositive", type=:error - ), + debug=if is_tutorial_mode() + DebugIfEntry( + :ρ_denominator, >(-1e-8); message="denominator nonpositive", type=:error + ) + else + [] + end, evaluation::AbstractEvaluationType=AllocatingEvaluation(), initial_tangent_vector::T=zero_vector(M, p), maxIterLanczos=min(300, manifold_dimension(M)), objective_type=:Riemannian, ρ_regularization::R=1e3, - retraction_method::AbstractRetractionMethod=default_retraction_method(M), + retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)), σmin::R=1e-10, σ::R=100.0 / sqrt(manifold_dimension(M)), η1::R=0.1, @@ -454,7 +469,7 @@ function initialize_solver!(dmp::AbstractManoptProblem, arcs::AdaptiveRegulariza get_gradient!(dmp, arcs.X, arcs.p) return arcs end -function step_solver!(dmp::AbstractManoptProblem, arcs::AdaptiveRegularizationState, i) +function step_solver!(dmp::AbstractManoptProblem, arcs::AdaptiveRegularizationState, k) M = get_manifold(dmp) mho = get_objective(dmp) # Update sub state diff --git a/src/solvers/alternating_gradient_descent.jl b/src/solvers/alternating_gradient_descent.jl index 665c0d25b5..5c85d01327 100644 --- a/src/solvers/alternating_gradient_descent.jl +++ b/src/solvers/alternating_gradient_descent.jl @@ -220,9 +220,9 @@ function initialize_solver!( (shuffle!(agds.order)) return agds end -function step_solver!(amp::AbstractManoptProblem, agds::AlternatingGradientDescentState, i) +function step_solver!(amp::AbstractManoptProblem, agds::AlternatingGradientDescentState, k) M = get_manifold(amp) - step, agds.X = agds.direction(amp, agds, i) + step, agds.X = agds.direction(amp, agds, k) j = agds.order[agds.k] retract!(M[j], agds.p[M, j], agds.p[M, j], -step * agds.X[M, j]) agds.i += 1 diff --git a/src/solvers/augmented_Lagrangian_method.jl b/src/solvers/augmented_Lagrangian_method.jl index d25bd2dd59..fa5f68f94d 100644 --- a/src/solvers/augmented_Lagrangian_method.jl +++ b/src/solvers/augmented_Lagrangian_method.jl @@ -1,7 +1,9 @@ # # State # -@doc raw""" + +_sc_alm_default = "[`StopAfterIteration`](@ref)`(300)`$_sc_any([`StopWhenSmallerOrEqual](@ref)`(:ϵ, ϵ_min)`$_sc_all[`StopWhenChangeLess`](@ref)`(1e-10) )$_sc_any[`StopWhenChangeLess`](@ref)`" +@doc """ AugmentedLagrangianMethodState{P,T} <: AbstractManoptSolverState Describes the augmented Lagrangian method, with @@ -10,32 +12,54 @@ Describes the augmented Lagrangian method, with a default value is given in brackets if a parameter can be left out in initialization. -* `p`: a point on a manifold as starting point and current iterate -* `sub_problem`: an [`AbstractManoptProblem`](@ref) problem for the subsolver -* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver -* `ϵ`: (`1e–3`) the accuracy tolerance -* `ϵ_min`: (`1e-6`) the lower bound for the accuracy tolerance -* `λ`: (`ones(n)`) the Lagrange multiplier with respect to the equality constraints -* `λ_max`: (`20.0`) an upper bound for the Lagrange multiplier belonging to the equality constraints -* `λ_min`: (`- λ_max`) a lower bound for the Lagrange multiplier belonging to the equality constraints -* `μ`: (`ones(m)`) the Lagrange multiplier with respect to the inequality constraints -* `μ_max`: (`20.0`) an upper bound for the Lagrange multiplier belonging to the inequality constraints -* `ρ`: (`1.0`) the penalty parameter -* `τ`: (`0.8`) factor for the improvement of the evaluation of the penalty parameter -* `θ_ρ`: (`0.3`) the scaling factor of the penalty parameter -* `θ_ϵ`: ((`(ϵ_min/ϵ)^(ϵ_exponent)`) the scaling factor of the accuracy tolerance -* `penalty`: evaluation of the current penalty term, initialized to `Inf`. -* `stop`: (`(`[`StopAfterIteration`](@ref)`(300) | (`[`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min) & `[`StopWhenChangeLess`](@ref)`(1e-10))`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. - +* `ϵ`: the accuracy tolerance +* `ϵ_min`: the lower bound for the accuracy tolerance +* `λ`: the Lagrange multiplier with respect to the equality constraints +* `λ_max`: an upper bound for the Lagrange multiplier belonging to the equality constraints +* `λ_min`: a lower bound for the Lagrange multiplier belonging to the equality constraints +* $_field_p +* `penalty`: evaluation of the current penalty term, initialized to `Inf`. +* `μ`: the Lagrange multiplier with respect to the inequality constraints +* `μ_max`: an upper bound for the Lagrange multiplier belonging to the inequality constraints +* `ρ`: the penalty parameter +* $_field_sub_problem +* $_field_sub_state +* `τ`: factor for the improvement of the evaluation of the penalty parameter +* `θ_ρ`: the scaling factor of the penalty parameter +* `θ_ϵ`: the scaling factor of the accuracy tolerance +* $_field_stop # Constructor - AugmentedLagrangianMethodState(M::AbstractManifold, co::ConstrainedManifoldObjective, p; kwargs...) + AugmentedLagrangianMethodState( + M::AbstractManifold, + co::ConstrainedManifoldObjective, + p, + sub_problem, + sub_state; + kwargs... + ) -construct an augmented Lagrangian method options with the fields and defaults as stated before, -where the manifold `M` and the [`ConstrainedManifoldObjective`](@ref) `co` can be helpful for +construct an augmented Lagrangian method options, where $(_arg_inline_M) and the [`ConstrainedManifoldObjective`](@ref) `co` are used for manifold- or objective specific defaults. +## Keyword arguments + +the following keyword arguments are available to initialise the corresponding fields + +* `ϵ=1e–3` +* `ϵ_min=1e-6` +* `λ=ones(n)`: `n` is the number of equality constraints in the [`ConstrainedManifoldObjective`](@ref) `co`. +* `λ_max=20.0` +* `λ_min=- λ_max` +* `μ=ones(m)`: `m` is the number of inequality constraints in the [`ConstrainedManifoldObjective`](@ref) `co`. +* `μ_max=20.0` +* `ρ=1.0` +* `τ=0.8` +* `θ_ρ=0.3` +* `θ_ϵ=(ϵ_min/ϵ)^(ϵ_exponent)` +* stopping_criterion=$_sc_alm_default. + # See also [`augmented_Lagrangian_method`](@ref) @@ -73,11 +97,11 @@ mutable struct AugmentedLagrangianMethodState{ sub_state::Union{AbstractEvaluationType,AbstractManoptSolverState}; ϵ::R=1e-3, ϵ_min::R=1e-6, + λ::V=ones(length(get_equality_constraint(M, co, p, :))), λ_max::R=20.0, λ_min::R=-λ_max, - μ_max::R=20.0, μ::V=ones(length(get_inequality_constraint(M, co, p, :))), - λ::V=ones(length(get_equality_constraint(M, co, p, :))), + μ_max::R=20.0, ρ::R=1.0, τ::R=0.8, θ_ρ::R=0.3, @@ -146,120 +170,157 @@ function show(io::IO, alms::AugmentedLagrangianMethodState) return print(io, s) end -@doc raw""" +_doc_alm_λ_update = raw""" +```math +λ_j^{(k+1)} =\operatorname{clip}_{[λ_{\min},λ_{\max}]} (λ_j^{(k)} + ρ^{(k)} h_j(p^{(k+1)})) \text{for all} j=1,…,p, +``` +""" +_doc_alm_μ_update = raw""" +```math +μ_i^{(k+1)} =\operatorname{clip}_{[0,μ_{\max}]} (μ_i^{(k)} + ρ^{(k)} g_i(p^{(k+1)})) \text{ for all } i=1,…,m, +``` +""" +_doc_alm_ε_update = raw""" +```math +ϵ^{(k)}=\max\{ϵ_{\min}, θ_ϵ ϵ^{(k-1)}\}, +``` +""" + +_doc_alm_σ = raw""" +```math +σ^{(k)}=\max_{j=1,…,p, i=1,…,m} \{\|h_j(p^{(k)})\|, \|\max_{i=1,…,m}\{g_i(p^{(k)}), -\frac{μ_i^{(k-1)}}{ρ^{(k-1)}} \}\| \}. +``` +""" + +_doc_alm_ρ_update = raw""" +```math +ρ^{(k)} = \begin{cases} +ρ^{(k-1)}/θ_ρ, & \text{if } σ^{(k)}\leq θ_ρ σ^{(k-1)} ,\\ +ρ^{(k-1)}, & \text{else,} +\end{cases} +``` +""" + +_doc_alm = """ augmented_Lagrangian_method(M, f, grad_f, p=rand(M); kwargs...) augmented_Lagrangian_method(M, cmo::ConstrainedManifoldObjective, p=rand(M); kwargs...) + augmented_Lagrangian_method!(M, f, grad_f, p; kwargs...) + augmented_Lagrangian_method!(M, cmo::ConstrainedManifoldObjective, p; kwargs...) perform the augmented Lagrangian method (ALM) [LiuBoumal:2019](@cite). +This method can work in-place of `p`. The aim of the ALM is to find the solution of the constrained optimisation task -```math -\begin{aligned} -\min_{p ∈\mathcal{M}} &f(p)\\ -\text{subject to } &g_i(p)\leq 0 \quad \text{ for } i= 1, …, m,\\ -\quad &h_j(p)=0 \quad \text{ for } j=1,…,n, -\end{aligned} -``` +$_problem_constrained -where `M` is a Riemannian manifold, and ``f``, ``\{g_i\}_{i=1}^m`` and ``\{h_j\}_{j=1}^p`` are twice continuously differentiable functions from `M` to ℝ. +where `M` is a Riemannian manifold, and ``f``, ``$(_math_sequence("g", "i", "1", "n"))`` and ``$(_math_sequence("h", "j", "1", "m")) +are twice continuously differentiable functions from `M` to ℝ. In every step ``k`` of the algorithm, the [`AugmentedLagrangianCost`](@ref) -``\mathcal{L}_{ρ^{(k-1)}}(p, μ^{(k-1)}, λ^{(k-1)})`` is minimized on ``\mathcal{M}``, -where ``μ^{(k-1)} ∈ \mathbb R^n`` and ``λ^{(k-1)} ∈ ℝ^m`` are the current iterates of the Lagrange multipliers and ``ρ^{(k-1)}`` is the current penalty parameter. + ``$(_doc_al_Cost("k"))`` is minimized on $_l_M, + where ``μ^{(k)} ∈ ℝ^n`` and ``λ^{(k)} ∈ ℝ^m`` are the current iterates of the Lagrange multipliers and ``ρ^{(k)}`` is the current penalty parameter. The Lagrange multipliers are then updated by -```math -λ_j^{(k)} =\operatorname{clip}_{[λ_{\min},λ_{\max}]} (λ_j^{(k-1)} + ρ^{(k-1)} h_j(p^{(k)})) \text{for all} j=1,…,p, -``` +$_doc_alm_λ_update and -```math -μ_i^{(k)} =\operatorname{clip}_{[0,μ_{\max}]} (μ_i^{(k-1)} + ρ^{(k-1)} g_i(p^{(k)})) \text{ for all } i=1,…,m, -``` +$_doc_alm_μ_update -where ``λ_{\min} \leq λ_{\max}`` and ``μ_{\max}`` are the multiplier boundaries. + where ``λ_{$_l_min} ≤ λ_{$_l_max}`` and ``μ_{$_l_max}`` are the multiplier boundaries. Next, the accuracy tolerance ``ϵ`` is updated as -```math -ϵ^{(k)}=\max\{ϵ_{\min}, θ_ϵ ϵ^{(k-1)}\}, -``` +$_doc_alm_ε_update -where ``ϵ_{\min}`` is the lowest value ``ϵ`` is allowed to become and ``θ_ϵ ∈ (0,1)`` is constant scaling factor. + where ``ϵ_{$_l_min}`` is the lowest value ``ϵ`` is allowed to become and ``θ_ϵ ∈ (0,1)`` is constant scaling factor. Last, the penalty parameter ``ρ`` is updated as follows: with -```math -σ^{(k)}=\max_{j=1,…,p, i=1,…,m} \{\|h_j(p^{(k)})\|, \|\max_{i=1,…,m}\{g_i(p^{(k)}), -\frac{μ_i^{(k-1)}}{ρ^{(k-1)}} \}\| \}. -``` +$_doc_alm_σ `ρ` is updated as -```math -ρ^{(k)} = \begin{cases} -ρ^{(k-1)}/θ_ρ, & \text{if } σ^{(k)}\leq θ_ρ σ^{(k-1)} ,\\ -ρ^{(k-1)}, & \text{else,} -\end{cases} -``` +$_doc_alm_ρ_update where ``θ_ρ ∈ (0,1)`` is a constant scaling factor. # Input -* `M` a manifold ``\mathcal M`` -* `f` a cost function ``F:\mathcal M→ℝ`` to minimize -* `grad_f` the gradient of the cost function +$_arg_M +$_arg_f +$_arg_grad_f # Optional (if not called with the [`ConstrainedManifoldObjective`](@ref) `cmo`) -* `g`: (`nothing`) the inequality constraints -* `h`: (`nothing`) the equality constraints -* `grad_g`: (`nothing`) the gradient of the inequality constraints -* `grad_h`: (`nothing`) the gradient of the equality constraints +* `g=nothing`: the inequality constraints +* `h=nothing`: the equality constraints +* `grad_g=nothing`: the gradient of the inequality constraints +* `grad_h=nothing`: the gradient of the equality constraints Note that one of the pairs (`g`, `grad_g`) or (`h`, `grad_h`) has to be provided. Otherwise the problem is not constrained and a better solver would be for example [`quasi_Newton`](@ref). -# Optional - -* `ϵ`: (`1e-3`) the accuracy tolerance -* `ϵ_min`: (`1e-6`) the lower bound for the accuracy tolerance -* `ϵ_exponent`: (`1/100`) exponent of the ϵ update factor; - also 1/number of iterations until maximal accuracy is needed to end algorithm naturally -* `θ_ϵ`: (`(ϵ_min / ϵ)^(ϵ_exponent)`) the scaling factor of the exactness -* `μ`: (`ones(size(h(M,x),1))`) the Lagrange multiplier with respect to the inequality constraints -* `μ_max`: (`20.0`) an upper bound for the Lagrange multiplier belonging to the inequality constraints -* `λ`: (`ones(size(h(M,x),1))`) the Lagrange multiplier with respect to the equality constraints -* `λ_max`: (`20.0`) an upper bound for the Lagrange multiplier belonging to the equality constraints -* `λ_min`: (`- λ_max`) a lower bound for the Lagrange multiplier belonging to the equality constraints -* `τ`: (`0.8`) factor for the improvement of the evaluation of the penalty parameter -* `ρ`: (`1.0`) the penalty parameter -* `θ_ρ`: (`0.3`) the scaling factor of the penalty parameter -* `equality_constraints`: (`nothing`) the number ``n`` of equality constraints. -* `gradient_range` (`nothing`, equivalent to [`NestedPowerRepresentation`](@extref) specify how gradients are represented -* `gradient_equality_range`: (`gradient_range`) specify how the gradients of the equality constraints are represented -* `gradient_inequality_range`: (`gradient_range`) specify how the gradients of the inequality constraints are represented -* `inequality_constraints`: (`nothing`) the number ``m`` of inequality constraints. -* `sub_grad`: ([`AugmentedLagrangianGrad`](@ref)`(problem, ρ, μ, λ)`) use augmented Lagrangian gradient, especially with the same numbers `ρ,μ` as in the options for the sub problem -* `sub_kwargs`: (`(;)`) keyword arguments to decorate the sub options, for example the `debug=` keyword. -* `sub_stopping_criterion`: ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenGradientNormLess`](@ref)`(ϵ) | `[`StopWhenStepsizeLess`](@ref)`(1e-8)`) specify a stopping criterion for the subsolver. -* `sub_problem`: ([`DefaultManoptProblem`](@ref)`(M, `[`ConstrainedManifoldObjective`](@ref)`(subcost, subgrad; evaluation=evaluation))`) problem for the subsolver -* `sub_state`: ([`QuasiNewtonState`](@ref)) using [`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref) with [`InverseBFGS`](@ref) and `sub_stopping_criterion` as a stopping criterion. See also `sub_kwargs`. -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(300)` | ([`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min)` & [`StopWhenChangeLess`](@ref)`(1e-10))`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. +# Keyword Arguments + +* $_kw_evaluation_default: $_kw_evaluation + +* `ϵ=1e-3`: the accuracy tolerance +* `ϵ_min=1e-6`: the lower bound for the accuracy tolerance +* `ϵ_exponent=1/100`: exponent of the ϵ update factor; + also 1/number of iterations until maximal accuracy is needed to end algorithm naturally + + * `equality_constraints=nothing`: the number ``n`` of equality constraints. + If not provided, a call to the gradient of `g` is performed to estimate these. + +* `gradient_range=nothing`: specify how both gradients of the constraints are represented + +* `gradient_equality_range=gradient_range`: + specify how gradients of the equality constraints are represented, see [`VectorGradientFunction`](@ref). + +* `gradient_inequality_range=gradient_range`: + specify how gradients of the inequality constraints are represented, see [`VectorGradientFunction`](@ref). + +* `inequality_constraints=nothing`: the number ``m`` of inequality constraints. + If not provided, a call to the gradient of `g` is performed to estimate these. + +* `λ=ones(size(h(M,x),1))`: the Lagrange multiplier with respect to the equality constraints +* `λ_max=20.0`: an upper bound for the Lagrange multiplier belonging to the equality constraints +* `λ_min=- λ_max`: a lower bound for the Lagrange multiplier belonging to the equality constraints + +* `μ=ones(size(h(M,x),1))`: the Lagrange multiplier with respect to the inequality constraints +* `μ_max=20.0`: an upper bound for the Lagrange multiplier belonging to the inequality constraints + +* `ρ=1.0`: the penalty parameter +* `τ=0.8`: factor for the improvement of the evaluation of the penalty parameter +* `θ_ρ=0.3`: the scaling factor of the penalty parameter +* `θ_ϵ=(ϵ_min / ϵ)^(ϵ_exponent)`: the scaling factor of the exactness + +* `sub_cost=[`AugmentedLagrangianCost± (@ref)`(cmo, ρ, μ, λ):` use augmented Lagrangian cost, based on the [`ConstrainedManifoldObjective`](@ref) build from the functions provided. + $(_kw_used_in("sub_problem")) + +* `sub_grad=[`AugmentedLagrangianGrad`](@ref)`(cmo, ρ, μ, λ)`: use augmented Lagrangian gradient, based on the [`ConstrainedManifoldObjective`](@ref) build from the functions provided. + $(_kw_used_in("sub_problem")) + +* $_kw_sub_kwargs_default: $_kw_sub_kwargs + +* `sub_problem=`[`DefaultManoptProblem`](@ref)`(M, `[`ConstrainedManifoldObjective`](@ref)`(subcost, subgrad; evaluation=evaluation))`: + problem for the subsolver +* `sub_state=`[`QuasiNewtonState`](@ref)) using [`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref) with [`InverseBFGS`](@ref) and `sub_stopping_criterion` as a stopping criterion. + See also `sub_kwargs=`. + +* `stopping_criterion=$_sc_alm_default`: $_kw_stopping_criterion For the `range`s of the constraints' gradient, other power manifold tangent space representations, mainly the [`ArrayPowerRepresentation`](@extref Manifolds :jl:type:`Manifolds.ArrayPowerRepresentation`) can be used if the gradients can be computed more efficiently in that representation. -With `equality_constraints` and `inequality_constraints` you have to provide the dimension -of the ranges of `h` and `g`, respectively. If not provided, together with `M` and the start point `p0`, -a call to either of these is performed to try to infer these. - -# Output +$(_kw_others) -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$_doc_sec_output """ + +@doc "$(_doc_alm)" function augmented_Lagrangian_method( M::AbstractManifold, f, @@ -330,13 +391,7 @@ function augmented_Lagrangian_method( return (typeof(q) == typeof(rs)) ? rs[] : rs end -@doc raw""" - augmented_Lagrangian_method!(M, f, grad_f, p=rand(M); kwargs...) - -perform the augmented Lagrangian method (ALM) in-place of `p`. - -For all options, see [`augmented_Lagrangian_method`](@ref). -""" +@doc "$(_doc_alm)" function augmented_Lagrangian_method!( M::AbstractManifold, f::TF, @@ -528,6 +583,6 @@ function step_solver!(mp::AbstractManoptProblem, alms::AugmentedLagrangianMethod end get_solver_result(alms::AugmentedLagrangianMethodState) = alms.p -function get_last_stepsize(::AbstractManoptProblem, s::AugmentedLagrangianMethodState, i) +function get_last_stepsize(::AbstractManoptProblem, s::AugmentedLagrangianMethodState, k) return s.last_stepsize end diff --git a/src/solvers/cma_es.jl b/src/solvers/cma_es.jl index 5e57298a57..2b97d8fe99 100644 --- a/src/solvers/cma_es.jl +++ b/src/solvers/cma_es.jl @@ -335,7 +335,7 @@ function step_solver!(mp::AbstractManoptProblem, s::CMAESState, iteration::Int) return s end -@doc raw""" +@doc """ cma_es(M, f, p_m=rand(M); σ::Real=1.0, kwargs...) Perform covariance matrix adaptation evolutionary strategy search for global gradient-free @@ -347,31 +347,30 @@ setting. # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f: \mathcal M→ℝ`` to find a minimizer ``p^*`` for +* `M`: a manifold ``$(_l_M) M`` +* `f`: a cost function ``f: $(_l_M)→ℝ`` to find a minimizer ``p^*`` for # Optional -* `p_m`: (`rand(M)`) an initial point `p` -* `σ`: (`1.0`) initial standard deviation +* `p_m=`$(_link_rand()): an initial point `p` +* `σ=1.0`: initial standard deviation * `λ`: (`4 + Int(floor(3 * log(manifold_dimension(M))))`population size (can be increased for a more thorough global search but decreasing is not recommended) -* `tol_fun`: (`1e-12`) tolerance for the `StopWhenPopulationCostConcentrated`, similar to +* `tol_fun=1e-12`: tolerance for the `StopWhenPopulationCostConcentrated`, similar to absolute difference between function values at subsequent points -* `tol_x`: (`1e-12`) tolerance for the `StopWhenPopulationStronglyConcentrated`, similar to +* `tol_x=1e-12`: tolerance for the `StopWhenPopulationStronglyConcentrated`, similar to absolute difference between subsequent point but actually computed from distribution parameters. -* `stopping_criterion`: (`default_cma_es_stopping_criterion(M, λ; tol_fun=tol_fun, tol_x=tol_x)`) -* `retraction_method`: (`default_retraction_method(M, typeof(p_m))`) -* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p_m))`) +* `stopping_criterion=default_cma_es_stopping_criterion(M, λ; tol_fun=tol_fun, tol_x=tol_x)`: +* `retraction_method=default_retraction_method(M, typeof(p_m))`: +* `vector_transport_method=default_vector_transport_method(M, typeof(p_m))`: * `basis` (`DefaultOrthonormalBasis()`) basis used to represent covariance in -* `rng`: (`default_rng()`) random number generator for generating new points +* `rng=default_rng()`: random number generator for generating new points on `M` -# Output +$(_kw_others) -the obtained (approximate) minimizer ``p^*``. -To obtain the whole final state of the solver, see [`get_solver_return`](@ref) for details. +$(_doc_sec_output) """ function cma_es(M::AbstractManifold, f; kwargs...) mco = ManifoldCostObjective(f) @@ -537,15 +536,15 @@ end indicates_convergence(c::StopWhenCovarianceIllConditioned) = false is_active_stopping_criterion(c::StopWhenCovarianceIllConditioned) = c.at_iteration > 0 function (c::StopWhenCovarianceIllConditioned)( - ::AbstractManoptProblem, s::CMAESState, i::Int + ::AbstractManoptProblem, s::CMAESState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 return false end c.last_cond = s.covariance_matrix_cond - if i > 0 && c.last_cond > c.threshold - c.at_iteration = i + if k > 0 && c.last_cond > c.threshold + c.at_iteration = k return true end return false @@ -593,15 +592,15 @@ function is_active_stopping_criterion(c::StopWhenBestCostInGenerationConstant) return c.iterations_since_change >= c.iteration_range end function (c::StopWhenBestCostInGenerationConstant)( - ::AbstractManoptProblem, s::CMAESState, i::Int + ::AbstractManoptProblem, s::CMAESState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 c.best_objective_at_last_change = Inf return false end if c.iterations_since_change >= c.iteration_range - c.at_iteration = i + c.at_iteration = k return true else if c.best_objective_at_last_change != s.best_fitness_current_gen @@ -676,15 +675,15 @@ function is_active_stopping_criterion(c::StopWhenEvolutionStagnates) median(c.median_history[1:thr_low]) <= median(c.median_history[thr_high:end]) return best_stagnant && median_stagnant end -function (c::StopWhenEvolutionStagnates)(::AbstractManoptProblem, s::CMAESState, i::Int) - if i == 0 # reset on init +function (c::StopWhenEvolutionStagnates)(::AbstractManoptProblem, s::CMAESState, k::Int) + if k == 0 # reset on init empty!(c.best_history) empty!(c.median_history) c.at_iteration = -1 return false end if is_active_stopping_criterion(c) - c.at_iteration = i + c.at_iteration = k return true else push!(c.best_history, s.best_fitness_current_gen) @@ -729,7 +728,7 @@ norm of `σ * p_c` is smaller than `tol`. This corresponds to `TolX` condition f # Fields -* `tol` the tolerance to check against +* `tol` the tolerance to verify against * `at_iteration` an internal field to indicate at with iteration ``i \geq 0`` the tolerance was met. # Constructor @@ -750,16 +749,16 @@ function is_active_stopping_criterion(c::StopWhenPopulationStronglyConcentrated) return c.at_iteration >= 0 end function (c::StopWhenPopulationStronglyConcentrated)( - ::AbstractManoptProblem, s::CMAESState, i::Int + ::AbstractManoptProblem, s::CMAESState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 return false end norm_inf_dev = norm(s.deviations, Inf) norm_inf_p_c = norm(s.p_c, Inf) if norm_inf_dev < c.tol && s.σ * norm_inf_p_c < c.tol - c.at_iteration = i + c.at_iteration = k return true end return false @@ -801,14 +800,14 @@ indicates_convergence(c::StopWhenPopulationDiverges) = false function is_active_stopping_criterion(c::StopWhenPopulationDiverges) return c.at_iteration >= 0 end -function (c::StopWhenPopulationDiverges)(::AbstractManoptProblem, s::CMAESState, i::Int) - if i == 0 # reset on init +function (c::StopWhenPopulationDiverges)(::AbstractManoptProblem, s::CMAESState, k::Int) + if k == 0 # reset on init c.at_iteration = -1 return false end cur_σ_times_maxstddev = s.σ * maximum(s.deviations) if cur_σ_times_maxstddev / c.last_σ_times_maxstddev > c.tol - c.at_iteration = i + c.at_iteration = k return true end return false @@ -856,9 +855,9 @@ function is_active_stopping_criterion(c::StopWhenPopulationCostConcentrated) return c.at_iteration >= 0 end function (c::StopWhenPopulationCostConcentrated)( - ::AbstractManoptProblem, s::CMAESState, i::Int + ::AbstractManoptProblem, s::CMAESState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 return false end @@ -867,7 +866,7 @@ function (c::StopWhenPopulationCostConcentrated)( min_hist, max_hist = extrema(c.best_value_history) if max_hist - min_hist < c.tol && s.best_fitness_current_gen - s.worst_fitness_current_gen < c.tol - c.at_iteration = i + c.at_iteration = k return true end end diff --git a/src/solvers/conjugate_gradient_descent.jl b/src/solvers/conjugate_gradient_descent.jl index b0b842f726..55af6db4f9 100644 --- a/src/solvers/conjugate_gradient_descent.jl +++ b/src/solvers/conjugate_gradient_descent.jl @@ -28,19 +28,30 @@ function show(io::IO, cgds::ConjugateGradientDescentState) return print(io, s) end -@doc raw""" - conjugate_gradient_descent(M, F, gradF, p=rand(M)) - conjugate_gradient_descent(M, gradient_objective, p) - -perform a conjugate gradient based descent - +_doc_CG_formula = raw""" ````math p_{k+1} = \operatorname{retr}_{p_k} \bigl( s_kδ_k \bigr), ```` +""" +_doc_update_delta_k = raw""" +````math +\delta_k=\operatorname{grad}f(p_k) + β_k \delta_{k-1} +```` +""" -where ``\operatorname{retr}`` denotes a retraction on the `Manifold` `M` +_doc_CG = """ + conjugate_gradient_descent(M, f, grad_f, p=rand(M)) + conjugate_gradient_descent!(M, f, grad_f, p) + conjugate_gradient_descent(M, gradient_objective, p) + conjugate_gradient_descent!(M, gradient_objective, p; kwargs...) + +perform a conjugate gradient based descent- + +$(_doc_CG_formula) + +where ``$(_l_retr)`` denotes a retraction on the `Manifold` `M` and one can employ different rules to update the descent direction ``δ_k`` based on -the last direction ``δ_{k-1}`` and both gradients ``\operatorname{grad}f(x_k)``,``\operatorname{grad}f(x_{k-1})``. +the last direction ``δ_{k-1}`` and both gradients ``$(_l_grad)f(x_k)``,``$(_l_grad) f(x_{k-1})``. The [`Stepsize`](@ref) ``s_k`` may be determined by a [`Linesearch`](@ref). Alternatively to `f` and `grad_f` you can provide @@ -54,39 +65,36 @@ These can all be combined with a [`ConjugateGradientBealeRestart`](@ref) rule. They all compute ``β_k`` such that this algorithm updates the search direction as -````math -\delta_k=\operatorname{grad}f(p_k) + β_k \delta_{k-1} -```` +$(_doc_update_delta_k) # Input -* `M` a manifold ``\mathcal M`` -* `f` a cost function ``F:\mathcal M→ℝ`` to minimize implemented as a function `(M,p) -> v` -* `grad_f` the gradient ``\operatorname{grad}F:\mathcal M → T\mathcal M`` of ``F`` implemented also as `(M,x) -> X` -* `p` an initial value ``x∈\mathcal M`` +$(_arg_M) +$(_arg_f) +$(_arg_grad_f) +$(_arg_p) -# Optional +# Keyword arguments -* `coefficient`: ([`ConjugateDescentCoefficient`](@ref) `<:` [`DirectionUpdateRule`](@ref)) +* `coefficient::DirectionUpdateRule=[`ConjugateDescentCoefficient`](@ref)`()`: rule to compute the descent direction update coefficient ``β_k``, as a functor, where - the resulting function maps are `(amp, cgs, i) -> β` with `amp` an [`AbstractManoptProblem`](@ref), + the resulting function maps are `(amp, cgs, k) -> β` with `amp` an [`AbstractManoptProblem`](@ref), `cgs` is the [`ConjugateGradientDescentState`](@ref), and `i` is the current iterate. -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by allocation (default) form `gradF(M, x)` - or [`InplaceEvaluation`](@ref) in place of the form `gradF!(M, X, x)`. -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a retraction method to use. -* `stepsize`: ([`ArmijoLinesearch`](@ref) via [`default_stepsize`](@ref)) A [`Stepsize`](@ref) function applied to the - search direction. The default is a constant step size 1. -* `stopping_criterion`: (`stopWhenAny( stopAtIteration(200), stopGradientNormLess(10.0^-8))`) - a function indicating when to stop. -* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) vector transport method to transport - the old descent direction when computing the new descent direction. - -If you provide the [`ManifoldGradientObjective`](@ref) directly, `evaluation` is ignored. - -# Output - -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +* $(_kw_evaluation_default): $(_kw_evaluation) +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stepsize=[`ArmijoLinesearch`](@ref)`(M)`: $_kw_stepsize + via [`default_stepsize`](@ref)) passing on the `default_retraction_method` +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(500)`$(_sc_any)[`StopWhenGradientNormLess`](@ref)`(1e-8)`: + $(_kw_stopping_criterion) +* $(_kw_vector_transport_method_default): $(_kw_vector_transport_method) + +If you provide the [`ManifoldGradientObjective`](@ref) directly, the `evaluation=` keyword is ignored. +The decorations are still applied to the objective. + +$(_doc_sec_output) """ + +@doc "$(_doc_CG)" conjugate_gradient_descent(M::AbstractManifold, args...; kwargs...) function conjugate_gradient_descent(M::AbstractManifold, f, grad_f; kwargs...) return conjugate_gradient_descent(M, f, grad_f, rand(M); kwargs...) @@ -120,29 +128,7 @@ function conjugate_gradient_descent( return conjugate_gradient_descent!(M, mgo, q; kwargs...) end -@doc raw""" - conjugate_gradient_descent!(M, F, gradF, x) - conjugate_gradient_descent!(M, gradient_objective, p; kwargs...) - -perform a conjugate gradient based descent in place of `x` as - -````math -p_{k+1} = \operatorname{retr}_{p_k} \bigl( s_k\delta_k \bigr), -```` -where ``\operatorname{retr}`` denotes a retraction on the `Manifold` `M` - -# Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``F:\mathcal M→ℝ`` to minimize -* `grad_f`: the gradient ``\operatorname{grad}F:\mathcal M→ T\mathcal M`` of F -* `p`: an initial value ``p∈\mathcal M`` - -Alternatively to `f` and `grad_f` you can provide -the [`AbstractManifoldGradientObjective`](@ref) `gradient_objective` directly. - -for more details and options, especially the [`DirectionUpdateRule`](@ref)s, -see [`conjugate_gradient_descent`](@ref). -""" +@doc "$(_doc_CG)" conjugate_gradient_descent!(M::AbstractManifold, params...; kwargs...) function conjugate_gradient_descent!( M::AbstractManifold, @@ -195,13 +181,13 @@ function initialize_solver!(amp::AbstractManoptProblem, cgs::ConjugateGradientDe cgs.β = 0.0 return cgs end -function step_solver!(amp::AbstractManoptProblem, cgs::ConjugateGradientDescentState, i) +function step_solver!(amp::AbstractManoptProblem, cgs::ConjugateGradientDescentState, k) M = get_manifold(amp) copyto!(M, cgs.p_old, cgs.p) - current_stepsize = get_stepsize(amp, cgs, i, cgs.δ) + current_stepsize = get_stepsize(amp, cgs, k, cgs.δ) retract!(M, cgs.p, cgs.p, cgs.δ, current_stepsize, cgs.retraction_method) get_gradient!(amp, cgs.X, cgs.p) - cgs.β = cgs.coefficient(amp, cgs, i) + cgs.β = cgs.coefficient(amp, cgs, k) vector_transport_to!(M, cgs.δ, cgs.p_old, cgs.δ, cgs.p, cgs.vector_transport_method) cgs.δ .*= cgs.β cgs.δ .-= cgs.X diff --git a/src/solvers/conjugate_residual.jl b/src/solvers/conjugate_residual.jl index 414b4d50dc..2245f5d145 100644 --- a/src/solvers/conjugate_residual.jl +++ b/src/solvers/conjugate_residual.jl @@ -1,34 +1,34 @@ -@doc raw""" +@doc """ conjugate_residual(TpM::TangentSpace, A, b, p=rand(TpM)) conjugate_residual(TpM::TangentSpace, slso::SymmetricLinearSystemObjective, p=rand(TpM)) conjugate_residual!(TpM::TangentSpace, A, b, p) conjugate_residual!(TpM::TangentSpace, slso::SymmetricLinearSystemObjective, p) -Compute the solution of ``\mathcal A(p)[X] + b(p) = 0_p ``, where +Compute the solution of ``$(_l_cal("A"))(p)[X] + b(p) = 0_p ``, where -* ``\mathcal A`` is a linear, symmetric operator on ``T_p\mathcal M`` +* ``$(_l_cal("A"))`` is a linear, symmetric operator on ``$(_l_TpM)`` * ``b`` is a vector field on the manifold -* ``X ∈ T_p\mathcal M`` is a tangent vector -* ``0_p`` is the zero vector ``T_p\mathcal M``. +* ``X ∈ $(_l_TpM)`` is a tangent vector +* ``0_p`` is the zero vector ``$(_l_TpM)``. This implementation follows Algorithm 3 in [LaiYoshise:2024](@cite) and is initalised with ``X^{(0)}`` as the zero vector and -* the initial residual ``r^{(0)} = -b(p) - \mathcal A(p)[X^{(0)}]`` +* the initial residual ``r^{(0)} = -b(p) - $(_l_cal("A"))(p)[X^{(0)}]`` * the initial conjugate direction ``d^{(0)} = r^{(0)}`` -* initialize ``Y^{(0)} = \mathcal A(p)[X^{(0)}]`` +* initialize ``Y^{(0)} = $(_l_cal("A"))(p)[X^{(0)}]`` performed the following steps at iteration ``k=0,…`` until the `stopping_criterion` is fulfilled. -1. compute a step size ``α_k = \displaystyle\frac{\langle r^{(k)}, \mathcal A(p)[r^{(k)}] \rangle_p}{\langle \mathcal A(p)[d^{(k)}], \mathcal A(p)[d^{(k)}] \rangle_p}`` +1. compute a step size ``α_k = $(_l_ds)$(_l_frac("⟨ r^{(k)}, $(_l_cal("A"))(p)[r^{(k)}] ⟩_p","⟨ $(_l_cal("A"))(p)[d^{(k)}], $(_l_cal("A"))(p)[d^{(k)}] ⟩_p"))`` 2. do a step ``X^{(k+1)} = X^{(k)} + α_kd^{(k)}`` 2. update the residual ``r^{(k+1)} = r^{(k)} + α_k Y^{(k)}`` -4. compute ``Z = \mathcal A(p)[r^{(k+1)}]`` -5. Update the conjugate coefficient ``β_k = \displaystyle\frac{\langle r^{(k+1)}, \mathcal A(p)[r^{(k+1)}] \rangle_p}{\langle r^{(k)}, \mathcal A(p)[r^{(k)}] \rangle_p}`` +4. compute ``Z = $(_l_cal("A"))(p)[r^{(k+1)}]`` +5. Update the conjugate coefficient ``β_k = $(_l_ds)$(_l_frac("⟨ r^{(k+1)}, $(_l_cal("A"))(p)[r^{(k+1)}] ⟩_p", "⟨ r^{(k)}, $(_l_cal("A"))(p)[r^{(k)}] ⟩_p"))`` 6. Update the conjugate direction ``d^{(k+1)} = r^{(k+1)} + β_kd^{(k)}`` 7. Update ``Y^{(k+1)} = -Z + β_k Y^{(k)}`` -Note that the right hand side of Step 7 is the same as evaluating ``\mathcal A[d^{(k+1)]``, but avoids the actual evaluation +Note that the right hand side of Step 7 is the same as evaluating ``$(_l_cal("A"))[d^{(k+1)}]``, but avoids the actual evaluation # Input @@ -40,8 +40,8 @@ Note that the right hand side of Step 7 is the same as evaluating ``\mathcal A[d # Keyword arguments * `evaluation=`[`AllocatingEvaluation`](@ref) specify whether `A` and `b` are implemented allocating or in-place -* `stopping_criterion::`[`StoppingCriterion`](@ref)`=`[`StopAfterIteration`](@ref)`(`[`manifold_dimension`](@extref ManifoldsBase.manifold_dimension-Tuple{AbstractManifold})`(TpM))`[` | `](@ref StopWhenAny)[`StopWhenRelativeResidualLess`](@ref)`(c,1e-8)`, - where `c` is the norm of ``\lVert b \rVert``. +* `stopping_criterion::`[`StoppingCriterion`](@ref)`=`[`StopAfterIteration`](@ref)`(`$(_link_manifold_dimension())$_sc_any[`StopWhenRelativeResidualLess`](@ref)`(c,1e-8)`, + where `c` is the norm of ``$(_l_norm("b"))``. # Output @@ -74,7 +74,7 @@ function conjugate_residual!( X; stopping_criterion::SC=StopAfterIteration(manifold_dimension(TpM)) | StopWhenRelativeResidualLess( - norm(base_manifold(TpM), base_point(TpM), get_b(TpM, slso, X)), 1e-8 + norm(base_manifold(TpM), base_point(TpM), get_b(TpM, slso)), 1e-8 ), kwargs..., ) where {SC<:StoppingCriterion} @@ -94,7 +94,7 @@ function initialize_solver!( TpM = get_manifold(amp) get_hessian!(TpM, crs.r, get_objective(amp), base_point(TpM), crs.X) crs.r .*= -1 - crs.r .-= get_b(TpM, get_objective(amp), crs.X) + crs.r .-= get_b(TpM, get_objective(amp)) copyto!(TpM, crs.d, crs.r) get_hessian!(amp, crs.Ar, crs.X, crs.r) copyto!(TpM, crs.Ad, crs.Ar) diff --git a/src/solvers/convex_bundle_method.jl b/src/solvers/convex_bundle_method.jl index de3c1c292f..93155be188 100644 --- a/src/solvers/convex_bundle_method.jl +++ b/src/solvers/convex_bundle_method.jl @@ -1,43 +1,65 @@ -@doc raw""" +@doc """ ConvexBundleMethodState <: AbstractManoptSolverState Stores option values for a [`convex_bundle_method`](@ref) solver. # Fields -* `atol_λ`: (`eps()`) tolerance parameter for the convex coefficients in λ -* `atol_errors`: (`eps()`) tolerance parameter for the linearization errors -* `bundle`: bundle that collects each iterate with the computed subgradient at the iterate -* `bundle_cap`: (`25`) the maximal number of elements the bundle is allowed to remember -* `diameter`: (`50.0`) estimate for the diameter of the level set of the objective function at the starting point -* `domain`: (`(M, p) -> isfinite(f(M, p))`) a function to that evaluates - to true when the current candidate is in the domain of the objective `f`, and false otherwise, - for example `domain = (M, p) -> p ∈ dom f(M, p) ? true : false` -* `g`: descent direction -* `inverse_retraction_method`: the inverse retraction to use within -* `k_max`: upper bound on the sectional curvature of the manifold -* `linearization_errors`: linearization errors at the last serious step -* `m`: (`1e-3`) the parameter to test the decrease of the cost: ``f(q_{k+1}) \le f(p_k) + m \xi``. -* `p`: current candidate point -* `p_last_serious`: last serious iterate -* `retraction_method`: the retraction to use within -* `stop`: a [`StoppingCriterion`](@ref) +THe following fields require a (real) number type `R`, as well as +point type `P` and a tangent vector type `T`` + +* `atol_λ::R`: tolerance parameter for the convex coefficients in λ +* `atol_errors::R: tolerance parameter for the linearization errors +* `bundle<:AbstractVector{Tuple{<:P,<:T}}`: bundle that collects each iterate with the computed subgradient at the iterate +* `bundle_cap::Int`: the maximal number of elements the bundle is allowed to remember +* `diameter::R`: estimate for the diameter of the level set of the objective function at the starting point +* `domain: the domain of ``f`` as a function `(M,p) -> b`that evaluates to true when the current candidate is in the domain of `f`, and false otherwise, +* `g::T`: descent direction +* $(_field_inv_retr) +* `k_max::R`: upper bound on the sectional curvature of the manifold +* `linearization_errors<:AbstractVector{<:R}`: linearization errors at the last serious step +* `m::R`: the parameter to test the decrease of the cost: ``f(q_{k+1}) ≤ f(p_k) + m ξ``. +* $(_field_iterate) +* `p_last_serious::P`: last serious iterate +* $(_field_retr) +* $(_field_stop) * `transported_subgradients`: subgradients of the bundle that are transported to `p_last_serious` -* `vector_transport_method`: the vector transport method to use within -* `X`: (`zero_vector(M, p)`) the current element from the possible subgradients at `p` that was last evaluated. -* `stepsize`: ([`ConstantStepsize`](@ref)`(M)`) a [`Stepsize`](@ref) -* `ε`: convex combination of the linearization errors -* `λ`: convex coefficients that solve the subproblem -* `ξ`: the stopping parameter given by ``ξ = -\lvert g\rvert^2 – ε`` -* `sub_problem`: ([`convex_bundle_method_subsolver`]) a function that solves the sub problem on `M` given the last serious iterate `p_last_serious`, the linearization errors `linearization_errors`, and the transported subgradients `transported_subgradients`, -* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver +* $(_field_vector_transp) +* $(_field_subgradient) +* $(_field_step) +* `ε::R`: convex combination of the linearization errors +* `λ:::AbstractVector{<:R}`: convex coefficients from the slution of the subproblem +* `ξ`: the stopping parameter given by ``ξ = -\\lVert g\\rvert^2 – ε`` +* $(_field_sub_problem) +* $(_field_sub_state) # Constructor - ConvexBundleMethodState(M::AbstractManifold, p; kwargs...) + ConvexBundleMethodState(M::AbstractManifold, p=rand(M); kwargs...) + +Generate the state for the [`convex_bundle_method`](@ref) on the manifold `M` +with initial point `p`. + +# Keyword arguments + +Most of the following keyword arguments set default values for the fields mentioned before. + +* `atol_λ=eps()` +* `atol_errors=eps()` +* `bundle_cap=25`` +* `m=1e-2` +* `diameter=50.0` +* `domain=(M, p) -> isfinite(f(M, p))` +* `k_max=0` +* `stepsize=default_stepsize(M, ConvexBundleMethodState)`, which defaults to [`ConstantStepsize`](@ref)`(M)`. +* $(_kw_inverse_retraction_method_default): $(_kw_inverse_retraction_method) +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stopping_criterion=`[`StopWhenLagrangeMultiplierLess`](@ref)`(1e-8)`$(_sc_any)[`StopAfterIteration`](@ref)`(5000)` +* `X=`$(_link_zero_vector()) specify the type of tangent vector to use. +* $(_kw_vector_transport_method_default): $(_kw_vector_transport_method) +* `sub_problem=`[`convex_bundle_method_subsolver`](@ref) +* `sub_state=[`AllocatingEvaluation`](@ref) -with keywords for all fields with defaults besides `p_last_serious` which obtains the same type as `p`. - You can use for example `X=` to specify the type of tangent vector to use """ mutable struct ConvexBundleMethodState{ P, @@ -84,15 +106,15 @@ mutable struct ConvexBundleMethodState{ ϱ::Nothing# deprecated function ConvexBundleMethodState( M::TM, - p::P; + p::P=rand(M); atol_λ::R=eps(), atol_errors::R=eps(), - bundle_cap::Integer=25, + bundle_cap::I=25, m::R=1e-2, diameter::R=50.0, - domain::D, + domain::D=(M, p) -> isfinite(f(M, p)), k_max=0, - stepsize::S=default_stepsize(M, SubGradientMethodState), + stepsize::S=default_stepsize(M, ConvexBundleMethodState), inverse_retraction_method::IR=default_inverse_retraction_method(M, typeof(p)), retraction_method::TR=default_retraction_method(M, typeof(p)), stopping_criterion::SC=StopWhenLagrangeMultiplierLess(1e-8) | @@ -110,6 +132,7 @@ mutable struct ConvexBundleMethodState{ P, T, Pr, + I, TM<:AbstractManifold, TR<:AbstractRetractionMethod, SC<:StoppingCriterion, @@ -133,12 +156,12 @@ mutable struct ConvexBundleMethodState{ T, Pr, typeof(sub_state_storage), - typeof(m), + R, typeof(linearization_errors), typeof(bundle), typeof(transported_subgradients), - typeof(domain), - typeof(bundle_cap), + D, + I, IR, TR, S, @@ -180,7 +203,9 @@ function set_iterate!(bms::ConvexBundleMethodState, M, p) return bms end get_subgradient(bms::ConvexBundleMethodState) = bms.g - +function default_stepsize(M::AbstractManifold, ::Type{ConvexBundleMethodState}) + return ConstantStepsize(M) +end function show(io::IO, cbms::ConvexBundleMethodState) i = get_count(cbms, :Iterations) Iter = (i > 0) ? "After $i iterations\n" : "" @@ -238,17 +263,20 @@ function (dbt::DomainBackTrackingStepsize)( return t end -@doc raw""" - convex_bundle_method(M, f, ∂f, p) - -perform a convex bundle method ``p_{j+1} = \mathrm{retr}(p_k, -g_k)``, where ``\mathrm{retr}`` -is a retraction and - +_doc_cbm_gk = raw""" ```math g_k = \sum_{j\in J_k} λ_j^k \mathrm{P}_{p_k←q_j}X_{q_j}, ``` +""" +_doc_convex_bundle_method = """ + convex_bundle_method(M, f, ∂f, p) + convex_bundle_method!(M, f, ∂f, p) -``p_k`` is the last serious iterate, ``X_{q_j} ∈ ∂f(q_j)``, and the ``λ_j^k`` are solutions +perform a convex bundle method ``p^{(k+1)} = $(_l_retr)_{p^{(k)}}(-g_k)`` where + +$(_doc_cbm_gk) + +and ``p_k`` is the last serious iterate, ``X_{q_j} ∈ ∂f(q_j)``, and the ``λ_j^k`` are solutions to the quadratic subproblem provided by the [`convex_bundle_method_subsolver`](@ref). Though the subdifferential might be set valued, the argument `∂f` should always @@ -258,57 +286,45 @@ For more details, see [BergmannHerzogJasa:2024](@cite). # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f:\mathcal M→ℝ`` to minimize -* `∂f`: the subgradient ``∂f: \mathcal M → T\mathcal M`` of f - restricted to always only returning one value/element from the subdifferential. - This function can be passed as an allocation function `(M, p) -> X` or - a mutating function `(M, X, p) -> X`, see `evaluation`. -* `p`: (`rand(M)`) an initial value ``p_0 ∈ \mathcal M`` - -# Optional - -* `atol_λ`: (`eps()`) tolerance parameter for the convex coefficients in λ. -* `atol_errors`: (`eps()`) tolerance parameter for the linearization errors. -* `m`: (`1e-3`) the parameter to test the decrease of the cost: ``f(q_{k+1}) \le f(p_k) + m \xi``. -* `diameter`: (`50.0`) estimate for the diameter of the level set of the objective function at the starting point. -* `domain`: (`(M, p) -> isfinite(f(M, p))`) a function to that evaluates to true when the current candidate is in the domain of the objective `f`, and false otherwise, for example domain = (M, p) -> p ∈ dom f(M, p) ? true : false. -* `k_max`: upper bound on the sectional curvature of the manifold. -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the subgradient works by - allocation (default) form `∂f(M, q)` or [`InplaceEvaluation`](@ref) in place, that is of the form `∂f!(M, X, p)`. -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction method to use -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a `retraction(M, p, X)` to use. -* `stopping_criterion`: ([`StopWhenLagrangeMultiplierLess`](@ref)`(1e-8; names=["-ξ"])`) a functor, see [`StoppingCriterion`](@ref), indicating when to stop -* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport method to use -* `sub_problem`: a function evaluating with new allocations that solves the sub problem on `M` given the last serious iterate `p_last_serious`, the linearization errors `linearization_errors`, and the transported subgradients `transported_subgradients` - -# Output - -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +* $(_arg_M) +* $(_arg_f) +* $(_arg_subgrad_f) +* $(_arg_p) + +# Keyword arguments + +* `atol_λ=eps()` : tolerance parameter for the convex coefficients in ``λ``. +* `atol_errors=eps()`: : tolerance parameter for the linearization errors. +* `bundle_cap=25`` +* `m=1e-3`: : the parameter to test the decrease of the cost: ``f(q_{k+1}) ≤ f(p_k) + m ξ``. +* `diameter=50.0`: estimate for the diameter of the level set of the objective function at the starting point. +* `domain=(M, p) -> isfinite(f(M, p))`: a function to that evaluates to true when the current candidate is in the domain of the objective `f`, and false otherwise. +* $(_kw_evaluation_default): $(_kw_evaluation) +* `k_max=0`: upper bound on the sectional curvature of the manifold. +* `stepsize=default_stepsize(M, ConvexBundleMethodState)`, which defaults to [`ConstantStepsize`](@ref)`(M)`. +* $(_kw_inverse_retraction_method_default): $(_kw_inverse_retraction_method) +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stopping_criterion=`[`StopWhenLagrangeMultiplierLess`](@ref)`(1e-8)`$(_sc_any)[`StopAfterIteration`](@ref)`(5000)`: + $(_kw_stopping_criterion) +* `X=`$(_link_zero_vector()) specify the type of tangent vector to use. +* $(_kw_vector_transport_method_default): $(_kw_vector_transport_method) +* `sub_problem=`[`convex_bundle_method_subsolver`](@ref): a Manopt problem or a closed form solution as a function for the sub problem +* `sub_state=[`AllocatingEvaluation`](@ref): specify a solver for the sub problem or how the closed form solution function is evaluated. + +$(_kw_others) + +$(_doc_sec_output) """ + +@doc "$(_doc_convex_bundle_method)" function convex_bundle_method( M::AbstractManifold, f::TF, ∂f::TdF, p=rand(M); kwargs... ) where {TF,TdF} p_star = copy(M, p) return convex_bundle_method!(M, f, ∂f, p_star; kwargs...) end -@doc raw""" - convex_bundle_method!(M, f, ∂f, p) -perform a bundle method ``p_{j+1} = \mathrm{retr}(p_k, -g_k)`` in place of `p`. - -# Input - -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f:\mathcal M→ℝ`` to minimize -* `∂f`: the (sub)gradient ``∂f:\mathcal M→ T\mathcal M`` of F - restricted to always only returning one value/element from the subdifferential. - This function can be passed as an allocation function `(M, p) -> X` or - a mutating function `(M, X, p) -> X`, see `evaluation`. -* `p`: an initial value ``p_0=p ∈ \mathcal M`` - -for more details and all optional parameters, see [`convex_bundle_method`](@ref). -""" +@doc "$(_doc_convex_bundle_method)" function convex_bundle_method!( M::AbstractManifold, f::TF, @@ -383,7 +399,7 @@ function initialize_solver!( push!(bms.transported_subgradients, zero_vector(M, bms.p)) return bms end -function step_solver!(mp::AbstractManoptProblem, bms::ConvexBundleMethodState, i) +function step_solver!(mp::AbstractManoptProblem, bms::ConvexBundleMethodState, k) M = get_manifold(mp) # Refactor to in-place for (j, (qj, Xj)) in enumerate(bms.bundle) @@ -400,7 +416,7 @@ function step_solver!(mp::AbstractManoptProblem, bms::ConvexBundleMethodState, i bms.g .= sum(bms.λ .* bms.transported_subgradients) bms.ε = sum(bms.λ .* bms.linearization_errors) bms.ξ = (-norm(M, bms.p_last_serious, bms.g)^2) - (bms.ε) - step = get_stepsize(mp, bms, i) + step = get_stepsize(mp, bms, k) retract!(M, bms.p, bms.p_last_serious, -step * bms.g, bms.retraction_method) bms.last_stepsize = step get_subgradient!(mp, bms.X, bms.p) @@ -452,7 +468,7 @@ function step_solver!(mp::AbstractManoptProblem, bms::ConvexBundleMethodState, i return bms end get_solver_result(bms::ConvexBundleMethodState) = bms.p_last_serious -function get_last_stepsize(::AbstractManoptProblem, bms::ConvexBundleMethodState, i) +function get_last_stepsize(::AbstractManoptProblem, bms::ConvexBundleMethodState, kw) return bms.last_stepsize end @@ -482,38 +498,38 @@ end # # Lagrange stopping criterion function (sc::StopWhenLagrangeMultiplierLess)( - mp::AbstractManoptProblem, bms::ConvexBundleMethodState, i::Int + mp::AbstractManoptProblem, bms::ConvexBundleMethodState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init sc.at_iteration = -1 end M = get_manifold(mp) - if (sc.mode == :estimate) && (-bms.ξ ≤ sc.tolerances[1]) && (i > 0) + if (sc.mode == :estimate) && (-bms.ξ ≤ sc.tolerances[1]) && (k > 0) sc.values[1] = -bms.ξ - sc.at_iteration = i + sc.at_iteration = k return true end ng = norm(M, bms.p_last_serious, bms.g) if (sc.mode == :both) && (bms.ε ≤ sc.tolerances[1]) && (ng ≤ sc.tolerances[2]) && - (i > 0) + (k > 0) sc.values[1] = bms.ε sc.values[2] = ng - sc.at_iteration = i + sc.at_iteration = k return true end return false end function (d::DebugWarnIfLagrangeMultiplierIncreases)( - ::AbstractManoptProblem, st::ConvexBundleMethodState, i::Int + ::AbstractManoptProblem, st::ConvexBundleMethodState, k::Int ) - (i < 1) && (return nothing) + (k < 1) && (return nothing) if d.status !== :No new_value = -st.ξ if new_value ≥ d.old_value * d.tol @warn """The Lagrange multiplier increased by at least $(d.tol). - At iteration #$i the negative of the Lagrange multiplier, -ξ, increased from $(d.old_value) to $(new_value).\n + At iteration #$k the negative of the Lagrange multiplier, -ξ, increased from $(d.old_value) to $(new_value).\n Consider decreasing either the `diameter` keyword argument, or one of the parameters involved in the estimation of the sectional curvature, such as `k_max` in the `convex_bundle_method` call. @@ -525,7 +541,7 @@ function (d::DebugWarnIfLagrangeMultiplierIncreases)( end elseif new_value < zero(number_eltype(st.ξ)) @warn """The Lagrange multiplier is positive. - At iteration #$i the negative of the Lagrange multiplier, -ξ, became negative.\n + At iteration #$k the negative of the Lagrange multiplier, -ξ, became negative.\n Consider increasing either the `diameter` keyword argument, or changing one of the parameters involved in the estimation of the sectional curvature, such as `k_max` in the `convex_bundle_method` call. @@ -539,9 +555,9 @@ function (d::DebugWarnIfLagrangeMultiplierIncreases)( end function (d::DebugStepsize)( - dmp::P, bms::ConvexBundleMethodState, i::Int + dmp::P, bms::ConvexBundleMethodState, k::Int ) where {P<:AbstractManoptProblem} - (i < 1) && return nothing - Printf.format(d.io, Printf.Format(d.format), get_last_stepsize(dmp, bms, i)) + (k < 1) && return nothing + Printf.format(d.io, Printf.Format(d.format), get_last_stepsize(dmp, bms, k)) return nothing end diff --git a/src/solvers/cyclic_proximal_point.jl b/src/solvers/cyclic_proximal_point.jl index 1b486d6341..ce8e2740d6 100644 --- a/src/solvers/cyclic_proximal_point.jl +++ b/src/solvers/cyclic_proximal_point.jl @@ -15,39 +15,37 @@ function show(io::IO, cpps::CyclicProximalPointState) This indicates convergence: $Conv""" return print(io, s) end -@doc raw""" - cyclic_proximal_point(M, f, proxes_f, p) - cyclic_proximal_point(M, mpo, p) +_doc_CPPA = """ + cyclic_proximal_point(M, f, proxes_f, p; kwargs...) + cyclic_proximal_point(M, mpo, p; kwargs...) + cyclic_proximal_point!(M, f, proxes_f, p; kwargs...) + cyclic_proximal_point!(M, mpo, p; kwargs...) -perform a cyclic proximal point algorithm. +perform a cyclic proximal point algorithm. This can be done in-place of `p`. # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f:\mathcal M→ℝ`` to minimize +* $(_arg_M) +* `f`: a cost function ``f: $(_l_M) M→ℝ`` to minimize * `proxes_f`: an Array of proximal maps (`Function`s) `(M,λ,p) -> q` or `(M, q, λ, p) -> q` for the summands of ``f`` (see `evaluation`) -* `p`: an initial value ``p ∈ \mathcal M`` +* $(_arg_p) where `f` and the proximal maps `proxes_f` can also be given directly as a [`ManifoldProximalMapObjective`](@ref) `mpo` -# Optional +# Keyword arguments -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the proximal maps work by allocation (default) form `prox(M, λ, x)` - or [`InplaceEvaluation`](@ref) in place of form `prox!(M, y, λ, x)`. -* `evaluation_order`: (`:Linear`) whether to use a randomly permuted sequence (`:FixedRandom`), +* $(_kw_evaluation_default): $(_kw_evaluation) +* `evaluation_order=:Linear`: whether to use a randomly permuted sequence (`:FixedRandom`:, a per cycle permuted sequence (`:Random`) or the default linear one. -* `λ`: (`iter -> 1/iter` ) a function returning the (square summable but - not summable) sequence of ``λ_i`` -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(5000) | `[`StopWhenChangeLess`](@ref)`(1e-12)`) a [`StoppingCriterion`](@ref). +* `λ=iter -> 1/iter`: a function returning the (square summable but not summable) sequence of ``λ_i`` +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(5000)`$(_sc_any)[`StopWhenChangeLess`](@ref)`(1e-12)`): $(_kw_stopping_criterion) -All other keyword arguments are passed to [`decorate_state!`](@ref) for decorators or -[`decorate_objective!`](@ref), respectively. -If you provide the [`ManifoldProximalMapObjective`](@ref) directly, these decorations can still be specified. +$(_kw_others) -# Output - -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$(_doc_sec_output) """ + +@doc "$(_doc_CPPA)" cyclic_proximal_point(M::AbstractManifold, args...; kwargs...) function cyclic_proximal_point( M::AbstractManifold, @@ -86,23 +84,7 @@ function cyclic_proximal_point( return cyclic_proximal_point!(M, mpo, q; kwargs...) end -@doc raw""" - cyclic_proximal_point!(M, F, proxes, p) - cyclic_proximal_point!(M, mpo, p) - -perform a cyclic proximal point algorithm in place of `p`. - -# Input - -* `M`: a manifold ``\mathcal M`` -* `F`: a cost function ``F:\mathcal M→ℝ`` to minimize -* `proxes`: an Array of proximal maps (`Function`s) `(M, λ, p) -> q` or `(M, q, λ, p)` for the summands of ``F`` -* `p`: an initial value ``p ∈ \mathcal M`` - -where `f` and the proximal maps `proxes_f` can also be given directly as a [`ManifoldProximalMapObjective`](@ref) `mpo` - -for all options, see [`cyclic_proximal_point`](@ref). -""" +@doc "$(_doc_CPPA)" cyclic_proximal_point!(M::AbstractManifold, args...; kwargs...) function cyclic_proximal_point!( M::AbstractManifold, @@ -140,8 +122,8 @@ function initialize_solver!(amp::AbstractManoptProblem, cpps::CyclicProximalPoin (cpps.order_type == :FixedRandom) && shuffle!(cpps.order) return cpps end -function step_solver!(amp::AbstractManoptProblem, cpps::CyclicProximalPointState, i) - λi = cpps.λ(i) +function step_solver!(amp::AbstractManoptProblem, cpps::CyclicProximalPointState, k) + λi = cpps.λ(k) for k in cpps.order get_proximal_map!(amp, cpps.p, λi, cpps.p, k) end diff --git a/src/solvers/debug_solver.jl b/src/solvers/debug_solver.jl index 5bfa0a84e5..ced0849e89 100644 --- a/src/solvers/debug_solver.jl +++ b/src/solvers/debug_solver.jl @@ -9,35 +9,39 @@ function initialize_solver!(amp::AbstractManoptProblem, dss::DebugSolverState) initialize_solver!(amp, dss.state) # Call Start get(dss.debugDictionary, :Start, DebugDivider(""))(amp, get_state(dss), 0) - # Reset others in the order as they appear later. - for key in [:BeforeIteration, :Iteration, :Stop] + # Reset / Init (maybe with print at 0) (before) Iteration + for key in [:BeforeIteration, :Iteration] get(dss.debugDictionary, key, DebugDivider(""))(amp, get_state(dss), 0) end + # (just) reset Stop (do not print here) + for key in [:Stop] + get(dss.debugDictionary, key, DebugDivider(""))(amp, get_state(dss), -1) + end return dss end """ - step_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, i) + step_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, k) Extend the `i`th step of the solver by a hook to run debug prints, that were added to the `:BeforeIteration` and `:Iteration` entries of the debug lists. """ -function step_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, i) - get(dss.debugDictionary, :BeforeIteration, DebugDivider(""))(amp, get_state(dss), i) - step_solver!(amp, dss.state, i) - get(dss.debugDictionary, :Iteration, DebugDivider(""))(amp, get_state(dss), i) +function step_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, k) + get(dss.debugDictionary, :BeforeIteration, DebugDivider(""))(amp, get_state(dss), k) + step_solver!(amp, dss.state, k) + get(dss.debugDictionary, :Iteration, DebugDivider(""))(amp, get_state(dss), k) return dss end """ - stop_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, i) + stop_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, k) Extend the `stop_solver!`, whether to stop the solver by a hook to run debug, that were added to the `:Stop` entry of the debug lists. """ -function stop_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, i::Int) - stop = stop_solver!(amp, dss.state, i) +function stop_solver!(amp::AbstractManoptProblem, dss::DebugSolverState, k::Int) + stop = stop_solver!(amp, dss.state, k) if stop - get(dss.debugDictionary, :Stop, DebugDivider(""))(amp, get_state(dss), i) + get(dss.debugDictionary, :Stop, DebugDivider(""))(amp, get_state(dss), k) end return stop end diff --git a/src/solvers/difference-of-convex-proximal-point.jl b/src/solvers/difference-of-convex-proximal-point.jl index 66fe545977..be5893e867 100644 --- a/src/solvers/difference-of-convex-proximal-point.jl +++ b/src/solvers/difference-of-convex-proximal-point.jl @@ -1,19 +1,20 @@ -@doc raw""" - DifferenceOfConvexProximalState{Type} <: AbstractSubProblemSolverState +@doc """ + DifferenceOfConvexProximalState{P, T, Pr, St, S<:Stepsize, SC<:StoppingCriterion, RTR<:AbstractRetractionMethod, ITR<:AbstractInverseRetractionMethod} + <: AbstractSubProblemSolverState A struct to store the current state of the algorithm as well as the form. It comes in two forms, depending on the realisation of the `subproblem`. # Fields -* `inverse_retraction_method`: (`default_inverse_retraction_method(M)`) an inverse retraction method to use within Frank Wolfe. -* `retraction_method`: (`default_retraction_method(M)`) a type of retraction +* $(_field_inv_retr) +* $(_field_retr) * `p`, `q`, `r`: the current iterate, the gradient step and the prox, respectively their type is set by initializing `p` -* `stepsize`: ([`ConstantStepsize`](@ref)`(1.0)`) a [`Stepsize`](@ref) function to run the modified algorithm (experimental) -* `stop`: ([`StopWhenChangeLess`](@ref)`(1e-8)`) a [`StoppingCriterion`](@ref) -* `X`, `Y`: (`zero_vector(M,p)`) the current gradient and descent direction, respectively +* $(_field_step) +* $(_field_stop) +* `X`, `Y`: the current gradient and descent direction, respectively their common type is set by the keyword `X` * `sub_problem`: an [`AbstractManoptProblem`](@ref) problem or a function `(M, p, X) -> q` or `(M, q, p, X)` for the a closed form solution of the sub problem * `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver or an [`AbstractEvaluationType`](@ref) in case the sub problem is provided as a function @@ -22,10 +23,13 @@ It comes in two forms, depending on the realisation of the `subproblem`. DifferenceOfConvexProximalState(M, p; kwargs...) -## Keyword arguments +# Keyword arguments -* `X`, `retraction_method`, `inverse_retraction_method`, `stepsize` for the corresponding fields -* `stoppping_criterion` for the [`StoppingCriterion`](@ref) +* $(_kw_inverse_retraction_method_default): $(_kw_inverse_retraction_method) +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stepsize=`[`ConstantStepsize`](@ref)`(M)`: $(_kw_stepsize) +* `stopping_criterion=`[StopWhenChangeLess`](@ref)`(1e-8)`: $(_kw_stopping_criterion) +* $(_kw_X_default): $(_kw_X) """ mutable struct DifferenceOfConvexProximalState{ P, @@ -58,7 +62,7 @@ mutable struct DifferenceOfConvexProximalState{ stepsize::S=ConstantStepsize(M), stopping_criterion::SC=StopWhenChangeLess(1e-8), inverse_retraction_method::I=default_inverse_retraction_method(M), - retraction_method::R=default_retraction_method(M), + retraction_method::R=default_retraction_method(M, typeof(p)), λ::Fλ=i -> 1, ) where { P, @@ -133,90 +137,86 @@ end # # Prox approach # -@doc raw""" +_doc_DCPPA = """ difference_of_convex_proximal_point(M, grad_h, p=rand(M); kwargs...) difference_of_convex_proximal_point(M, mdcpo, p=rand(M); kwargs...) + difference_of_convex_proximal_point!(M, grad_h, p; kwargs...) + difference_of_convex_proximal_point!(M, mdcpo, p; kwargs...) Compute the difference of convex proximal point algorithm [SouzaOliveira:2015](@cite) to minimize ```math - \operatorname*{arg\,min}_{p∈\mathcal M} g(p) - h(p) + $(_l_argmin)_{p∈$(_l_M)} g(p) - h(p) ``` -where you have to provide the (sub) gradient ``∂h`` of ``h`` and either -* the proximal map ``\operatorname{prox}_{\lambda g}`` of `g` as a function `prox_g(M, λ, p)` or `prox_g(M, q, λ, p)` +where you have to provide the subgradient ``∂h`` of ``h`` and either +* the proximal map ``$(_l_prox)_{λg}`` of `g` as a function `prox_g(M, λ, p)` or `prox_g(M, q, λ, p)` * the functions `g` and `grad_g` to compute the proximal map using a sub solver -* your own sub-solver, see optional keywords below - +* your own sub-solver, specified by `sub_problem=`and `sub_state=` This algorithm performs the following steps given a start point `p`= ``p^{(0)}``. -Then repeat for ``k=0,1,\ldots`` +Then repeat for ``k=0,1,…`` -1. ``X^{(k)} ∈ \operatorname{grad} h(p^{(k)})`` -2. ``q^{(k)} = \operatorname{retr}_{p^{(k)}}(λ_kX^{(k)})`` -3. ``r^{(k)} = \operatorname{prox}_{λ_kg}(q^{(k)})`` -4. ``X^{(k)} = \operatorname{retr}^{-1}_{p^{(k)}}(r^{(k)})`` +1. ``X^{(k)} ∈ $(_l_grad) h(p^{(k)})`` +2. ``q^{(k)} = $(_l_retr)_{p^{(k)}}(λ_kX^{(k)})`` +3. ``r^{(k)} = $(_l_prox)_{λ_kg}(q^{(k)})`` +4. ``X^{(k)} = $(_l_retr)^{-1}_{p^{(k)}}(r^{(k)})`` 5. Compute a stepsize ``s_k`` and -6. set ``p^{(k+1)} = \operatorname{retr}_{p^{(k)}}(s_kX^{(k)})``. +6. set ``p^{(k+1)} = $(_l_retr)_{p^{(k)}}(s_kX^{(k)})``. until the `stopping_criterion` is fulfilled. + See [AlmeidaNetoOliveiraSouza:2020](@cite) for more details on the modified variant, where steps 4-6 are slightly changed, since here the classical proximal point method for DC functions is obtained for ``s_k = 1`` and one can hence employ usual line search method. -# Optional parameters +# Keyword arguments -* `λ`: ( `i -> 1/2` ) a function returning the sequence of prox parameters λi -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradient - works by allocation (default) form `gradF(M, x)` or [`InplaceEvaluation`](@ref) in place of the form `gradF!(M, X, x)`. -* `cost`: (`nothing`) provide the cost `f`, for debug reasons / analysis - the default `sub_problem`. Use this if you have a more efficient version than using `g` from before. -* `gradient`: (`nothing`) specify ``\operatorname{grad} f``, for debug / analysis +* `λ`: ( `k -> 1/2` ) a function returning the sequence of prox parameters ``λ_k`` +* `cost=nothing`: provide the cost `f`, for debug reasons / analysis +* $(_kw_evaluation_default): $(_kw_evaluation) +* `gradient=nothing`: specify ``$(_l_grad) f``, for debug / analysis or enhancing the `stopping_criterion` -* `prox_g`: (`nothing`) specify a proximal map for the sub problem _or_ both of the following -* `g`: (`nothing`) specify the function `g`. -* `grad_g`: (`nothing`) specify the gradient of `g`. If both `g`and `grad_g` are specified, a subsolver is automatically set up. -* `inverse_retraction_method`: (`default_inverse_retraction_method(M)`) an inverse retraction method to use (see step 4). -* `retraction_method`: (`default_retraction_method(M)`) a retraction to use (see step 2) -* `stepsize`: ([`ConstantStepsize`](@ref)`(M)`) specify a [`Stepsize`](@ref) - to run the modified algorithm (experimental.) functor. -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenChangeLess`](@ref)`(1e-8)`) - a [`StoppingCriterion`](@ref) for the algorithm, also includes a [`StopWhenGradientNormLess`](@ref)`(1e-8)`, when a `gradient` is provided. - -While there are several parameters for a sub solver, the easiest is to provide the function `g` and `grad_g`, -such that together with the mandatory function `g` a default cost and gradient can be generated and passed to -a default subsolver. Hence the easiest example call looks like - -``` -difference_of_convex_proximal_point(M, grad_h, p0; g=g, grad_g=grad_g) -``` - -# Optional parameters for the sub problem - -* `sub_cost`: ([`ProximalDCCost`](@ref)`(g, copy(M, p), λ(1))`) cost to be used within - the default `sub_problem` that is initialized as soon as `g` is provided. -* `sub_grad`: ([`ProximalDCGrad`](@ref)`(grad_g, copy(M, p), λ(1); evaluation=evaluation)` +* `prox_g=nothing`: specify a proximal map for the sub problem _or_ both of the following +* `g=nothing`: specify the function `g`. +* `grad_g=nothing`: specify the gradient of `g`. If both `g`and `grad_g` are specified, a subsolver is automatically set up. +* $(_kw_inverse_retraction_method_default); $(_kw_inverse_retraction_method) +* $(_kw_retraction_method_default); $(_kw_retraction_method) +* `stepsize=`[`ConstantStepsize`](@ref)`(M)`): $(_kw_stepsize) +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`$(_sc_any)[`StopWhenChangeLess`](@ref)`(1e-8)`): + $(_kw_stopping_criterion) + A [`StopWhenGradientNormLess`](@ref)`(1e-8)` is added with $(_sc_any), when a `gradient` is provided. +* `sub_cost=`[`ProximalDCCost`](@ref)`(g, copy(M, p), λ(1))`): + cost to be used within the default `sub_problem` that is initialized as soon as `g` is provided. + $(_kw_used_in("sub_objective")) +* `sub_grad=`[`ProximalDCGrad`](@ref)`(grad_g, copy(M, p), λ(1); evaluation=evaluation)`: gradient to be used within the default `sub_problem`, that is initialized as soon as `grad_g` is provided. - This is generated by default when `grad_g` is provided. You can specify your own by overwriting this keyword. -* `sub_hess`: (a finite difference approximation by default) specify - a Hessian of the subproblem, which the default solver, see `sub_state` needs -* `sub_kwargs`: (`(;)`) pass keyword arguments to the `sub_state`, in form of - a `Dict(:kwname=>value)`, unless you set the `sub_state` directly. -* `sub_objective`: (a gradient or Hessian objective based on the last 3 keywords) - provide the objective used within `sub_problem` (if that is not specified by the user) -* `sub_problem`: ([`DefaultManoptProblem`](@ref)`(M, sub_objective)` specify a manopt problem for the sub-solver runs. + $(_kw_used_in("sub_objective")) +* `sub_hess`: (a finite difference approximation using `sub_grad` by default): + specify a Hessian of the `sub_cost`, which the default solver, see `sub_state=` needs. +* $(_kw_sub_kwargs_default): $(_kw_sub_kwargs) +* `sub_objective`: a gradient or Hessian objective based on `sub_cost=`, `sub_grad=`, and `sub_hess`if provided + the objective used within `sub_problem`. + $(_kw_used_in("sub_problem")) +* `sub_problem=`[`DefaultManoptProblem`](@ref)`(M, sub_objective)`: + specify a manopt problem or a function for the sub-solver runs. You can also provide a function for a closed form solution. Then `evaluation=` is taken into account for the form of this function. -* `sub_state`: ([`TrustRegionsState`](@ref)). requires the `sub_Hessian to be provided, - decorated with `sub_kwargs`) choose the solver by specifying a solver state to solve the `sub_problem` -* `sub_stopping_criterion`: ([`StopAfterIteration`](@ref)`(300) | `[`StopWhenStepsizeLess`](@ref)`(1e-9) | `[`StopWhenGradientNormLess`](@ref)`(1e-9)`) +* `sub_state`([`GradientDescentState`](@ref) or [`TrustRegionsState`](@ref) if `sub_hessian`): + the subsolver to be used when solving the sub problem. + By default this is also decorated using the `sub_kwargs`. + if the `sub_problem` if a function (a closed form solution), this is set to `evaluation` + and can be changed to the evaluation type of the closed form solution accordingly. +* `sub_stopping_criterion`: ([`StopAfterIteration`](@ref)`(300)`$(_sc_any)`[`StopWhenGradientNormLess`](@ref)`(1e-8)`: a stopping criterion used withing the default `sub_state=` + $(_kw_used_in("sub_state")) -all others are passed on to decorate the inner [`DifferenceOfConvexProximalState`](@ref). +$(_kw_others) -# Output -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$(_doc_sec_output) """ + +@doc "$(_doc_DCPPA)" difference_of_convex_proximal_point(M::AbstractManifold, args...; kwargs...) function difference_of_convex_proximal_point(M::AbstractManifold, grad_h; kwargs...) return difference_of_convex_proximal_point( @@ -281,23 +281,8 @@ function difference_of_convex_proximal_point( q = copy(M, p) return difference_of_convex_proximal_point!(M, mdcpo, q; kwargs...) end -@doc raw""" - difference_of_convex_proximal_point!(M, grad_h, p; cost=nothing, kwargs...) - difference_of_convex_proximal_point!(M, mdcpo, p; cost=nothing, kwargs...) - difference_of_convex_proximal_point!(M, mdcpo, prox_g, p; cost=nothing, kwargs...) - -Compute the difference of convex algorithm to minimize - -```math - \operatorname*{arg\,min}_{p∈\mathcal M} g(p) - h(p) -``` - -where you have to provide the proximal map of `g` and the gradient of `h`. -The computation is done in-place of `p`. - -For all further details, especially the keyword arguments, see [`difference_of_convex_proximal_point`](@ref). -""" +@doc "$(_doc_DCPPA)" difference_of_convex_proximal_point!(M::AbstractManifold, args...; kwargs...) function difference_of_convex_proximal_point!( M::AbstractManifold, @@ -327,7 +312,7 @@ function difference_of_convex_proximal_point!( evaluation::AbstractEvaluationType=AllocatingEvaluation(), inverse_retraction_method=default_inverse_retraction_method(M), objective_type=:Riemannian, - retraction_method=default_retraction_method(M), + retraction_method=default_retraction_method(M, typeof(p)), stepsize=ConstantStepsize(M), stopping_criterion=if isnothing(get_gradient_function(mdcpo)) StopAfterIteration(300) | StopWhenChangeLess(1e-9) @@ -441,15 +426,15 @@ function step_solver!( dcps::DifferenceOfConvexProximalState{ P,T,<:Function,ClosedFormSubSolverState{AllocatingEvaluation} }, - i, + k, ) where {P,T} M = get_manifold(amp) # each line is one step in the documented solver steps. Note the reuse of `dcps.X` get_subtrahend_gradient!(amp, dcps.X, dcps.p) - retract!(M, dcps.q, dcps.p, dcps.λ(i) * dcps.X, dcps.retraction_method) - copyto!(M, dcps.r, dcps.sub_problem(M, dcps.λ(i), dcps.q)) + retract!(M, dcps.q, dcps.p, dcps.λ(k) * dcps.X, dcps.retraction_method) + copyto!(M, dcps.r, dcps.sub_problem(M, dcps.λ(k), dcps.q)) inverse_retract!(M, dcps.X, dcps.p, dcps.r, dcps.inverse_retraction_method) - s = dcps.stepsize(amp, dcps, i) + s = dcps.stepsize(amp, dcps, k) retract!(M, dcps.p, dcps.p, s * dcps.X, dcps.retraction_method) return dcps end @@ -462,15 +447,15 @@ function step_solver!( dcps::DifferenceOfConvexProximalState{ P,T,<:Function,ClosedFormSubSolverState{InplaceEvaluation} }, - i, + k, ) where {P,T} M = get_manifold(amp) # each line is one step in the documented solver steps. Note the reuse of `dcps.X` get_subtrahend_gradient!(amp, dcps.X, dcps.p) - retract!(M, dcps.q, dcps.p, dcps.λ(i) * dcps.X, dcps.retraction_method) - dcps.sub_problem(M, dcps.r, dcps.λ(i), dcps.q) + retract!(M, dcps.q, dcps.p, dcps.λ(k) * dcps.X, dcps.retraction_method) + dcps.sub_problem(M, dcps.r, dcps.λ(k), dcps.q) inverse_retract!(M, dcps.X, dcps.p, dcps.r, dcps.inverse_retraction_method) - s = dcps.stepsize(amp, dcps, i) + s = dcps.stepsize(amp, dcps, k) retract!(M, dcps.p, dcps.p, s * dcps.X, dcps.retraction_method) return dcps end @@ -482,25 +467,25 @@ function step_solver!( dcps::DifferenceOfConvexProximalState{ P,T,<:AbstractManoptProblem,<:AbstractManoptSolverState }, - i, + k, ) where {P,T} M = get_manifold(amp) # Evaluate gradient of h into X get_subtrahend_gradient!(amp, dcps.X, dcps.p) # do a step in that direction - retract!(M, dcps.q, dcps.p, dcps.λ(i) * dcps.X, dcps.retraction_method) + retract!(M, dcps.q, dcps.p, dcps.λ(k) * dcps.X, dcps.retraction_method) # use this point (q) for the proximal map set_manopt_parameter!(dcps.sub_problem, :Objective, :Cost, :p, dcps.q) - set_manopt_parameter!(dcps.sub_problem, :Objective, :Cost, :λ, dcps.λ(i)) + set_manopt_parameter!(dcps.sub_problem, :Objective, :Cost, :λ, dcps.λ(k)) set_manopt_parameter!(dcps.sub_problem, :Objective, :Gradient, :p, dcps.q) - set_manopt_parameter!(dcps.sub_problem, :Objective, :Gradient, :λ, dcps.λ(i)) + set_manopt_parameter!(dcps.sub_problem, :Objective, :Gradient, :λ, dcps.λ(k)) set_iterate!(dcps.sub_state, M, copy(M, dcps.q)) solve!(dcps.sub_problem, dcps.sub_state) copyto!(M, dcps.r, get_solver_result(dcps.sub_state)) # use that direction inverse_retract!(M, dcps.X, dcps.p, dcps.r, dcps.inverse_retraction_method) # to determine a step size - s = dcps.stepsize(amp, dcps, i) + s = dcps.stepsize(amp, dcps, k) retract!(M, dcps.p, dcps.p, s * dcps.X, dcps.retraction_method) if !isnothing(get_gradient_function(get_objective(amp))) get_gradient!(amp, dcps.X, dcps.p) diff --git a/src/solvers/difference_of_convex_algorithm.jl b/src/solvers/difference_of_convex_algorithm.jl index 45ffddc888..b56bc1ef8d 100644 --- a/src/solvers/difference_of_convex_algorithm.jl +++ b/src/solvers/difference_of_convex_algorithm.jl @@ -1,5 +1,5 @@ -@doc raw""" +@doc """ DifferenceOfConvexState{Pr,St,P,T,SC<:StoppingCriterion} <: AbstractManoptSolverState @@ -8,19 +8,19 @@ It comes in two forms, depending on the realisation of the `subproblem`. # Fields -* `p` the current iterate, a point on the manifold -* `X` the current subgradient, a tangent vector to `p`. -* `sub_problem` problem for the subsolver -* `sub_state` state of the subproblem -* `stop` a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. +* $(_field_iterate) +* $(_field_subgradient) +* $(_field_sub_problem) +* $(_field_sub_state) +* $(_field_stop) -For the sub task, a method to solve +The sub task consists of a method to solve ```math - \operatorname*{argmin}_{q∈\mathcal M}\ g(p) - ⟨X, \log_p q⟩ + $(_l_argmin)_{q∈$(_l_M)}\\ g(p) - ⟨X, $(_l_log)_p q⟩ ``` -is needed. Besides a problem and options, one can also provide a function and +is needed. Besides a problem and a state, one can also provide a function and an [`AbstractEvaluationType`](@ref), respectively, to indicate a closed form solution for the sub task. @@ -37,8 +37,8 @@ Here the elements passed are the current iterate `p` and the subgradient `X` of ## further keyword arguments -* `initial_vector=zero_vector` (`zero_vectoir(M,p)`) how to initialize the inner gradient tangent vector -* `stopping_criterion` a [`StopAfterIteration`](@ref)`(200)` a stopping criterion +* `initial_vector=`$(_link_zero_vector()): how to initialize the inner gradient tangent vector +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`: a stopping criterion """ mutable struct DifferenceOfConvexState{ Pr,St<:AbstractManoptSolverState,P,T,SC<:StoppingCriterion @@ -110,82 +110,72 @@ function show(io::IO, dcs::DifferenceOfConvexState) This indicates convergence: $Conv""" return print(io, s) end -@doc raw""" +_doc_DoC = """ difference_of_convex_algorithm(M, f, g, ∂h, p=rand(M); kwargs...) difference_of_convex_algorithm(M, mdco, p; kwargs...) + difference_of_convex_algorithm!(M, f, g, ∂h, p; kwargs...) + difference_of_convex_algorithm!(M, mdco, p; kwargs...) Compute the difference of convex algorithm [BergmannFerreiraSantosSouza:2023](@cite) to minimize ```math - \operatorname*{arg\,min}_{p∈\mathcal M}\ g(p) - h(p) + $(_l_argmin)_{p∈$(_l_M)}\\ g(p) - h(p) ``` where you need to provide ``f(p) = g(p) - h(p)``, ``g`` and the subdifferential ``∂h`` of ``h``. This algorithm performs the following steps given a start point `p`= ``p^{(0)}``. -Then repeat for ``k=0,1,\ldots`` +Then repeat for ``k=0,1,…`` 1. Take ``X^{(k)} ∈ ∂h(p^{(k)})`` 2. Set the next iterate to the solution of the subproblem ```math - p^{(k+1)} ∈ \operatorname*{argmin}_{q ∈ \mathcal M} g(q) - ⟨X^{(k)}, \log_{p^{(k)}}q⟩ + p^{(k+1)} ∈ $(_l_argmin)_{q ∈ $(_l_M)} g(q) - ⟨X^{(k)}, $(_l_log)_{p^{(k)}}q⟩ ``` -until the `stopping_criterion` is fulfilled. - -# Optional parameters - -* `evaluation` ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by - allocation (default) form `grad_f(M, p)` or [`InplaceEvaluation`](@ref) form `grad_f!(M, X, x)` -* `gradient` (`nothing`) specify ``\operatorname{grad} f``, for debug / analysis or enhancing `stopping_criterion=` -* `grad_g` (`nothing`) specify the gradient of `g`. If specified, a subsolver is automatically set up. -* `initial_vector` (`zero_vector(M, p)`) initialise the inner tangent vector to store the subgradient result. -* `stopping_criterion` ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenChangeLess`](@ref)`(1e-8)`) - a [`StoppingCriterion`](@ref) for the algorithm. This includes a [`StopWhenGradientNormLess`](@ref)`(1e-8)`, when a `gradient` is provided. - -if you specify the [`ManifoldDifferenceOfConvexObjective`](@ref) `mdco`, additionally - -* `g` - (`nothing`) specify the function `g` If specified, a subsolver is automatically set up. - - -While there are several parameters for a sub solver, the easiest is to provide the function `grad_g=`, -such that together with the mandatory function `g` a default cost and gradient can be generated and passed to -a default subsolver. Hence the easiest example call looks like - -``` -difference_of_convex_algorithm(M, f, g, grad_h, p; grad_g=grad_g) -``` +until the stopping criterion (see the `stopping_criterion` keyword is fulfilled. -# Optional parameters for the sub problem +# Keyword arguments -* `sub_cost` ([`LinearizedDCCost`](@ref)`(g, p, initial_vector)`) - a cost to be used within the default `sub_problem` - Use this if you have a more efficient version than the default that is built using `g` from before. -* `sub_grad` ([`LinearizedDCGrad`](@ref)`(grad_g, p, initial_vector; evaluation=evaluation)` +* $(_kw_evaluation_default): $(_kw_evaluation) +* `gradient=nothing`: specify ``$(_l_grad) f``, for debug / analysis or enhancing the `stopping_criterion=` +* `grad_g=nothing`: specify the gradient of `g`. If specified, a subsolver is automatically set up. +* `initial_vector=`$(_link_zero_vector()): initialise the inner tangent vector to store the subgradient result. +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`$(_sc_any)[`StopWhenChangeLess`](@ref)`(1e-8)`: + $(_kw_stopping_criterion) +* `g=nothing`: specify the function `g` If specified, a subsolver is automatically set up. +* `sub_cost=`[`LinearizedDCCost`](@ref)`(g, p, initial_vector)`: a cost to be used within the default `sub_problem`. + $(_kw_used_in("sub_objective")) +* `sub_grad=`[`LinearizedDCGrad`](@ref)`(grad_g, p, initial_vector; evaluation=evaluation)`: gradient to be used within the default `sub_problem`. - This is generated by default when `grad_g` is provided. You can specify your own by overwriting this keyword. -* `sub_hess` (a finite difference approximation by default) specify a Hessian - of the subproblem, which the default solver, see `sub_state` needs -* `sub_kwargs` (`(;)`) pass keyword arguments to the `sub_state`, in form of - a `Dict(:kwname=>value)`, unless you set the `sub_state` directly. -* `sub_objective` (a gradient or Hessian objective based on the last 3 keywords) - provide the objective used within `sub_problem` (if that is not specified by the user) -* `sub_problem` ([`DefaultManoptProblem`](@ref)`(M, sub_objective)` specify a manopt problem for the sub-solver runs. + $(_kw_used_in("sub_objective")) +* `sub_hess`: (a finite difference approximation using `sub_grad` by default): + specify a Hessian of the `sub_cost`, which the default solver, see `sub_state=` needs. + $(_kw_used_in("sub_objective")) +* $(_kw_sub_kwargs_default): $(_kw_sub_kwargs) +* `sub_objective`: a gradient or Hessian objective based on `sub_cost=`, `sub_grad=`, and `sub_hess`if provided + the objective used within `sub_problem`. + $(_kw_used_in("sub_problem")) +* `sub_problem=`[`DefaultManoptProblem`](@ref)`(M, sub_objective)`: + specify a manopt problem or a function for the sub-solver runs. You can also provide a function for a closed form solution. Then `evaluation=` is taken into account for the form of this function. -* `sub_state` ([`TrustRegionsState`](@ref) by default, requires `sub_hessian` to be provided; decorated with `sub_kwargs`). - Choose the solver by specifying a solver state to solve the `sub_problem` +* `sub_state`([`GradientDescentState`](@ref) or [`TrustRegionsState`](@ref) if `sub_hessian`): + the subsolver to be used when solving the sub problem. + By default this is also decorated using the `sub_kwargs`. if the `sub_problem` if a function (a closed form solution), this is set to `evaluation` and can be changed to the evaluation type of the closed form solution accordingly. -* `sub_stopping_criterion` ([`StopAfterIteration`](@ref)`(300) | `[`StopWhenStepsizeLess`](@ref)`(1e-9) | `[`StopWhenGradientNormLess`](@ref)`(1e-9)`) +* `sub_stopping_criterion=`[`StopAfterIteration`](@ref)`(300)`$(_sc_any)[`StopWhenStepsizeLess`](@ref)`(1e-9)`$(_sc_any)[`StopWhenGradientNormLess`](@ref)`(1e-9)`: a stopping criterion used withing the default `sub_state=` -* `sub_stepsize` ([`ArmijoLinesearch`](@ref)`(M)`) specify a step size used within the `sub_state`, - -all others are passed on to decorate the inner [`DifferenceOfConvexState`](@ref). + $(_kw_used_in("sub_state")) +* `sub_stepsize=`[`ArmijoLinesearch`](@ref)`(M)`) specify a step size used within the `sub_state`. + $(_kw_used_in("sub_state")) -# Output +$(_kw_others) -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$(_doc_sec_output) """ + +@doc "$(_doc_DoC)" difference_of_convex_algorithm(M::AbstractManifold, args...; kwargs...) function difference_of_convex_algorithm(M::AbstractManifold, f, g, ∂h; kwargs...) return difference_of_convex_algorithm(M::AbstractManifold, f, g, ∂h, rand(M); kwargs...) @@ -244,16 +234,7 @@ function difference_of_convex_algorithm( return difference_of_convex_algorithm!(M, mdco, q; kwargs...) end -@doc raw""" - difference_of_convex_algorithm!(M, f, g, ∂h, p; kwargs...) - difference_of_convex_algorithm!(M, mdco, p; kwargs...) - -Run the difference of convex algorithm and perform the steps in place of `p`. -See [`difference_of_convex_algorithm`](@ref) for more details. - -if you specify the [`ManifoldDifferenceOfConvexObjective`](@ref) `mdco`, -the `g` is a keyword argument. -""" +@doc "$(_doc_DoC)" difference_of_convex_algorithm!(M::AbstractManifold, args...; kwargs...) function difference_of_convex_algorithm!( M::AbstractManifold, @@ -387,7 +368,7 @@ end function initialize_solver!(::AbstractManoptProblem, dcs::DifferenceOfConvexState) return dcs end -function step_solver!(amp::AbstractManoptProblem, dcs::DifferenceOfConvexState, i) +function step_solver!(amp::AbstractManoptProblem, dcs::DifferenceOfConvexState, kw) M = get_manifold(amp) get_subtrahend_gradient!(amp, dcs.X, dcs.p) set_manopt_parameter!(dcs.sub_problem, :Objective, :Cost, :p, dcs.p) diff --git a/src/solvers/exact_penalty_method.jl b/src/solvers/exact_penalty_method.jl index fca9cabfce..92cf0a914e 100644 --- a/src/solvers/exact_penalty_method.jl +++ b/src/solvers/exact_penalty_method.jl @@ -1,27 +1,43 @@ -@doc raw""" +@doc """ ExactPenaltyMethodState{P,T} <: AbstractManoptSolverState Describes the exact penalty method, with # Fields -a default value is given in brackets if a parameter can be left out in initialization. - -* `p`: a set point on a manifold as starting point -* `sub_problem`: an [`AbstractManoptProblem`](@ref) problem for the subsolver -* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver -* `ϵ`: (`1e–3`) the accuracy tolerance -* `ϵ_min`: (`1e-6`) the lower bound for the accuracy tolerance -* `u`: (`1e–1`) the smoothing parameter and threshold for violation of the constraints -* `u_min`: (`1e-6`) the lower bound for the smoothing parameter and threshold for violation of the constraints -* `ρ`: (`1.0`) the penalty parameter -* `θ_ρ`: (`0.3`) the scaling factor of the penalty parameter -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(300) | (`[`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min) & `[`StopWhenChangeLess`](@ref)`(min_stepsize))`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. + +* `ϵ`: the accuracy tolerance +* `ϵ_min`: the lower bound for the accuracy tolerance +* $(_field_p) +* `ρ`: the penalty parameter +* $(_field_sub_problem) +* $(_field_sub_state) +* $(_field_stop) +* `u`: the smoothing parameter and threshold for violation of the constraints +* `u_min`: the lower bound for the smoothing parameter and threshold for violation of the constraints +* `θ_ϵ`: the scaling factor of the tolerance parameter +* `θ_ρ`: the scaling factor of the penalty parameter +* `θ_u`: the scaling factor of the smoothing parameter # Constructor ExactPenaltyMethodState(M::AbstractManifold, p, sub_problem, sub_state; kwargs...) -construct an exact penalty options with the remaining previously mentioned fields as keywords using their provided defaults. +construct an exact penalty state. + +# Keyword arguments + +* `ϵ=1e-3` +* `ϵ_min=1e-6` +* `ϵ_exponent=1 / 100`: a shortcut for the scaling factor ``θ_ϵ`` +* `θ_ϵ=(ϵ_min / ϵ)^(ϵ_exponent)` +* `u=1e-1` +* `u_min=1e-6` +* `u_exponent=1 / 100`: a shortcut for the scaling factor ``θ_u``. +* `θ_u=(u_min / u)^(u_exponent)` +* `ρ=1.0` +* `θ_ρ=0.3` +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(300)`$(_sc_any)` (` + [`StopWhenSmallerOrEqual`](@ref)`(:ϵ, ϵ_min)`$(_sc_any)[`StopWhenChangeLess`](@ref)`(1e-10) )` # See also @@ -110,51 +126,22 @@ function show(io::IO, epms::ExactPenaltyMethodState) return print(io, s) end -@doc raw""" - exact_penalty_method(M, F, gradF, p=rand(M); kwargs...) - exact_penalty_method(M, cmo::ConstrainedManifoldObjective, p=rand(M); kwargs...) - -perform the exact penalty method (EPM) [LiuBoumal:2019](@cite) -The aim of the EPM is to find a solution of the constrained optimisation task - -```math -\begin{aligned} -\min_{p ∈\mathcal{M}} &f(p)\\ -\text{subject to } &g_i(p)\leq 0 \quad \text{ for } i= 1, …, m,\\ -\quad &h_j(p)=0 \quad \text{ for } j=1,…,n, -\end{aligned} -``` - -where `M` is a Riemannian manifold, and ``f``, ``\{g_i\}_{i=1}^m`` and ``\{h_j\}_{j=1}^n`` -are twice continuously differentiable functions from `M` to ℝ. -For that a weighted ``L_1``-penalty term for the violation of the constraints is added to the objective - +_doc_EPM_penalty = raw""" ```math f(x) + ρ\biggl( \sum_{i=1}^m \max\bigl\{0, g_i(x)\bigr\} + \sum_{j=1}^n \vert h_j(x)\vert\biggr), ``` - where ``ρ>0`` is the penalty parameter. -Since this is non-smooth, a [`SmoothingTechnique`](@ref) with parameter `u` is applied, -see the [`ExactPenaltyCost`](@ref). - -In every step ``k`` of the exact penalty method, the smoothed objective is then minimized over all -``x ∈\mathcal{M}``. -Then, the accuracy tolerance ``ϵ`` and the smoothing parameter ``u`` are updated by setting +""" +_doc_EMP_ϵ_update = raw""" ```math ϵ^{(k)}=\max\{ϵ_{\min}, θ_ϵ ϵ^{(k-1)}\}, ``` where ``ϵ_{\min}`` is the lowest value ``ϵ`` is allowed to become and ``θ_ϵ ∈ (0,1)`` is constant scaling factor, and +""" -```math -u^{(k)} = \max \{u_{\min}, \theta_u u^{(k-1)} \}, -``` - -where ``u_{\min}`` is the lowest value ``u`` is allowed to become and ``θ_u ∈ (0,1)`` is constant scaling factor. - -Finally, the penalty parameter ``ρ`` is updated as - +_doc_EMP_ρ_update = raw""" ```math ρ^{(k)} = \begin{cases} ρ^{(k-1)}/θ_ρ, & \text{if } \displaystyle \max_{j ∈ \mathcal{E},i ∈ \mathcal{I}} \Bigl\{ \vert h_j(x^{(k)}) \vert, g_i(x^{(k)})\Bigr\} \geq u^{(k-1)} \Bigr) ,\\ @@ -163,58 +150,104 @@ Finally, the penalty parameter ``ρ`` is updated as ``` where ``θ_ρ ∈ (0,1)`` is a constant scaling factor. +""" +_doc_EMP_u_update = raw""" +```math +u^{(k)} = \max \{u_{\min}, \theta_u u^{(k-1)} \}, +``` + +where ``u_{\min}`` is the lowest value ``u`` is allowed to become and ``θ_u ∈ (0,1)`` is constant scaling factor. +""" + +_doc_EPM = """ + exact_penalty_method(M, f, grad_f, p=rand(M); kwargs...) + exact_penalty_method(M, cmo::ConstrainedManifoldObjective, p=rand(M); kwargs...) + exact_penalty_method!(M, f, grad_f, p; kwargs...) + exact_penalty_method!(M, cmo::ConstrainedManifoldObjective, p; kwargs...) + +perform the exact penalty method (EPM) [LiuBoumal:2019](@cite) +The aim of the EPM is to find a solution of the constrained optimisation task + +$(_problem_constrained) + +where `M` is a Riemannian manifold, and ``f``, ``$(_math_sequence("g", "i", "1", "n"))`` and ``$(_math_sequence("h", "j", "1", "m"))`` +are twice continuously differentiable functions from `M` to ℝ. +For that a weighted ``L_1``-penalty term for the violation of the constraints is added to the objective + +$(_doc_EPM_penalty) + +Since this is non-smooth, a [`SmoothingTechnique`](@ref) with parameter `u` is applied, +see the [`ExactPenaltyCost`](@ref). + +In every step ``k`` of the exact penalty method, the smoothed objective is then minimized over all ``p ∈$(_l_M)``. +Then, the accuracy tolerance ``ϵ`` and the smoothing parameter ``u`` are updated by setting + +$(_doc_EMP_ϵ_update) + +$(_doc_EMP_u_update) + +Finally, the penalty parameter ``ρ`` is updated as + +$(_doc_EMP_ρ_update) # Input -* `M` a manifold ``\mathcal M`` -* `f` a cost function ``f:\mathcal M→ℝ`` to minimize -* `grad_f` the gradient of the cost function +* $(_arg_M) +* $(_arg_f) +* $(_arg_grad_f) +* $(_arg_p) -# Optional (if not called with the [`ConstrainedManifoldObjective`](@ref) `cmo`) +# Keyword arguments + if not called with the [`ConstrainedManifoldObjective`](@ref) `cmo` -* `g`: (`nothing`) the inequality constraints -* `h`: (`nothing`) the equality constraints -* `grad_g`: (`nothing`) the gradient of the inequality constraints -* `grad_h`: (`nothing`) the gradient of the equality constraints +* `g=nothing`: the inequality constraints +* `h=nothing`: the equality constraints +* `grad_g=nothing`: the gradient of the inequality constraints +* `grad_h=nothing`: the gradient of the equality constraints Note that one of the pairs (`g`, `grad_g`) or (`h`, `grad_h`) has to be provided. -Otherwise the problem is not constrained and you should consider using unconstrained solvers like [`quasi_Newton`](@ref). - -# Optional - -* `smoothing`: ([`LogarithmicSumOfExponentials`](@ref)) [`SmoothingTechnique`](@ref) to use -* `ϵ`: (`1e–3`) the accuracy tolerance -* `ϵ_exponent`: (`1/100`) exponent of the ϵ update factor; -* `ϵ_min`: (`1e-6`) the lower bound for the accuracy tolerance -* `u`: (`1e–1`) the smoothing parameter and threshold for violation of the constraints -* `u_exponent`: (`1/100`) exponent of the u update factor; -* `u_min`: (`1e-6`) the lower bound for the smoothing parameter and threshold for violation of the constraints -* `ρ`: (`1.0`) the penalty parameter -* `equality_constraints`: (`nothing`) the number ``n`` of equality constraints. -* `gradient_range` (`nothing`, equivalent to [`NestedPowerRepresentation`](@extref) specify how gradients are represented -* `gradient_equality_range`: (`gradient_range`) specify how the gradients of the equality constraints are represented -* `gradient_inequality_range`: (`gradient_range`) specify how the gradients of the inequality constraints are represented -* `inequality_constraints`: (`nothing`) the number ``m`` of inequality constraints. -* `min_stepsize`: (`1e-10`) the minimal step size -* `sub_cost`: ([`ExactPenaltyCost`](@ref)`(problem, ρ, u; smoothing=smoothing)`) use this exact penalty cost, especially with the same numbers `ρ,u` as in the options for the sub problem -* `sub_grad`: ([`ExactPenaltyGrad`](@ref)`(problem, ρ, u; smoothing=smoothing)`) use this exact penalty gradient, especially with the same numbers `ρ,u` as in the options for the sub problem -* `sub_kwargs`: (`(;)`) keyword arguments to decorate the sub options, for example debug, that automatically respects the main solvers debug options (like sub-sampling) as well -* `sub_stopping_criterion`: ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenGradientNormLess`](@ref)`(ϵ) | `[`StopWhenStepsizeLess`](@ref)`(1e-10)`) specify a stopping criterion for the subsolver. -* `sub_problem`: ([`DefaultManoptProblem`](@ref)`(M, `[`ManifoldGradientObjective`](@ref)`(sub_cost, sub_grad; evaluation=evaluation)`, provide a problem for the subsolver -* `sub_state`: ([`QuasiNewtonState`](@ref)) using [`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref) with [`InverseBFGS`](@ref) and `sub_stopping_criterion` as a stopping criterion. See also `sub_kwargs`. -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(300)` | ([`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min)` & [`StopWhenChangeLess`](@ref)`(1e-10)`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. +Otherwise the problem is not constrained and a better solver would be for example [`quasi_Newton`](@ref). + +# Further keyword arguments + +* `ϵ=1e–3`: the accuracy tolerance +* `ϵ_exponent=1/100`: exponent of the ϵ update factor; +* `ϵ_min=1e-6`: the lower bound for the accuracy tolerance +* `u=1e–1`: the smoothing parameter and threshold for violation of the constraints +* `u_exponent=1/100`: exponent of the u update factor; +* `u_min=1e-6`: the lower bound for the smoothing parameter and threshold for violation of the constraints +* `ρ=1.0`: the penalty parameter +* `equality_constraints=nothing`: the number ``n`` of equality constraints. + If not provided, a call to the gradient of `g` is performed to estimate these. +* `gradient_range=nothing`: specify how both gradients of the constraints are represented +* `gradient_equality_range=gradient_range`: + specify how gradients of the equality constraints are represented, see [`VectorGradientFunction`](@ref). +* `gradient_inequality_range=gradient_range`: + specify how gradients of the inequality constraints are represented, see [`VectorGradientFunction`](@ref). +* `inequality_constraints=nothing`: the number ``m`` of inequality constraints. + If not provided, a call to the gradient of `g` is performed to estimate these. +* `min_stepsize=1e-10`: the minimal step size +* `smoothing=`[`LogarithmicSumOfExponentials`](@ref): a [`SmoothingTechnique`](@ref) to use +* `sub_cost=`[`ExactPenaltyCost`](@ref)`(problem, ρ, u; smoothing=smoothing)`: cost to use in the sub solver + $(_kw_used_in("sub_problem")) +* `sub_grad=`[`ExactPenaltyGrad`](@ref)`(problem, ρ, u; smoothing=smoothing)`: gradient to use in the sub solver + $(_kw_used_in("sub_problem")) +* * $(_kw_sub_kwargs_default): $(_kw_sub_kwargs) +* `sub_stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`$(_sc_any)[`StopWhenGradientNormLess`](@ref)`(ϵ)`$(_sc_any)[`StopWhenStepsizeLess`](@ref)`(1e-10)`: a stopping cirterion for the sub solver + $(_kw_used_in("sub_state")) +* `sub_problem=`[`DefaultManoptProblem`](@ref)`(M, `[`ManifoldGradientObjective`](@ref)`(sub_cost, sub_grad; evaluation=evaluation)`: the problem for the subsolver. The objective can also be decorated with argumens from `sub_kwargs`. +* `sub_state=`[`QuasiNewtonState`](@ref)`(...)` a solver to use for the sub problem. By default an L-BFGS is used. +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(300)`$(_sc_any)` ( `[`StopWhenSmallerOrEqual`](@ref)`(ϵ, ϵ_min)`$(_sc_all)[`StopWhenChangeLess`](@ref)`(1e-10) )`: $(_kw_stopping_criterion) For the `range`s of the constraints' gradient, other power manifold tangent space representations, mainly the [`ArrayPowerRepresentation`](@extref Manifolds :jl:type:`Manifolds.ArrayPowerRepresentation`) can be used if the gradients can be computed more efficiently in that representation. -With `equality_constraints` and `inequality_constraints` you have to provide the dimension -of the ranges of `h` and `g`, respectively. If not provided, together with `M` and the start point `p0`, -a call to either of these is performed to try to infer these. - -# Output +$(_kw_others) -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$_doc_sec_output """ + +@doc "$(_doc_EPM)" exact_penalty_method(M::AbstractManifold, args...; kwargs...) function exact_penalty_method(M::AbstractManifold, f, grad_f; kwargs...) return exact_penalty_method(M, f, grad_f, rand(M); kwargs...) @@ -288,14 +321,7 @@ function exact_penalty_method( return exact_penalty_method!(M, cmo, q; kwargs...) end -@doc raw""" - exact_penalty_method!(M, f, grad_f, p; kwargs...) - exact_penalty_method!(M, cmo::ConstrainedManifoldObjective, p; kwargs...) - -perform the exact penalty method (EPM) performed in place of `p`. - -For all options, see [`exact_penalty_method`](@ref). -""" +@doc "$(_doc_EPM)" exact_penalty_method!(M::AbstractManifold, args...; kwargs...) function exact_penalty_method!( M::AbstractManifold, diff --git a/src/solvers/gradient_descent.jl b/src/solvers/gradient_descent.jl index cc5ef1ca25..fc74d68719 100644 --- a/src/solvers/gradient_descent.jl +++ b/src/solvers/gradient_descent.jl @@ -1,26 +1,37 @@ -@doc raw""" +@doc """ GradientDescentState{P,T} <: AbstractGradientSolverState -Describes a Gradient based descent algorithm, with +Describes the state of a gradient based descent algorithm. # Fields -a default value is given in brackets if a parameter can be left out in initialization. -* `p`: (`rand(M)` the current iterate -* `X`: (`zero_vector(M,p)`) the current gradient ``\operatorname{grad}f(p)``, initialised to zero vector. -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(100)`) a [`StoppingCriterion`](@ref) -* `stepsize`: ([`default_stepsize`](@ref)`(M, GradientDescentState)`) a [`Stepsize`](@ref) -* `direction`: ([`IdentityUpdateRule`](@ref)) a processor to compute the gradient -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) the retraction to use, defaults to - the default set for your manifold. +* $_field_iterate +* $_field_gradient +* $_field_stop +* $_field_step +* `direction::`[`DirectionUpdateRule`](@ref) : a processor to handle the obtained gradient and compute a + direction to “walk into”. +* $_field_retr # Constructor - GradientDescentState(M, p=rand(M); X=zero_vector(M, p), kwargs...) + GradientDescentState(M, p=rand(M); kwargs...) -Generate gradient descent options, where `X` can be used to set the tangent vector to store -the gradient in a certain type. All other fields are keyword arguments. +Initialize the gradient descent solver state, where + +## Input + +$_arg_M +$_arg_p + +## Keyword arguments + +* `direction=`[`IdentityUpdateRule`](@ref)`()` +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(100)` $_kw_stop_note +* `stepsize=`[`default_stepsize`](@ref)`(M, GradientDescentState; retraction_method=retraction_method)` +* $_kw_retraction_method_default +* $_kw_X_default # See also @@ -76,8 +87,8 @@ function GradientDescentState( M, p, X, stopping_criterion, stepsize, retraction_method, direction ) end -function (r::IdentityUpdateRule)(mp::AbstractManoptProblem, s::GradientDescentState, i) - return get_stepsize(mp, s, i), get_gradient!(mp, s.X, s.p) +function (r::IdentityUpdateRule)(mp::AbstractManoptProblem, s::GradientDescentState, k) + return get_stepsize(mp, s, k), get_gradient!(mp, s.X, s.p) end function default_stepsize( M::AbstractManifold, @@ -111,52 +122,67 @@ function show(io::IO, gds::GradientDescentState) return print(io, s) end -@doc raw""" - gradient_descent(M, f, grad_f, p=rand(M); kwargs...) - gradient_descent(M, gradient_objective, p=rand(M); kwargs...) - -perform a gradient descent - +_doc_gd_iterate = raw""" ```math p_{k+1} = \operatorname{retr}_{p_k}\bigl( s_k\operatorname{grad}f(p_k) \bigr), \qquad k=0,1,… ``` +where ``s_k > 0`` denotes a step size. +""" +_doc_gradient_descent = """ + gradient_descent(M, f, grad_f, p=rand(M); kwargs...) + gradient_descent(M, gradient_objective, p=rand(M); kwargs...) + gradient_descent!(M, f, grad_f, p; kwargs...) + gradient_descent!(M, gradient_objective, p; kwargs...) + +perform the gradient descent algorithm -with different choices of the stepsize ``s_k`` available (see `stepsize` option below). +$(_doc_gd_iterate) +The algorithm can be performed in-place of `p`. # Input -* `M` a manifold ``\mathcal M`` -* `f` a cost function ``f: \mathcal M→ℝ`` to find a minimizer ``p^*`` for -* `grad_f` the gradient ``\operatorname{grad}f: \mathcal M → T\mathcal M`` of f - as a function `(M, p) -> X` or a function `(M, X, p) -> X` -* `p` an initial value `p` ``= p_0 ∈ \mathcal M`` +$_arg_M +$_arg_f +$_arg_grad_f +$_arg_p + +$_arg_alt_mgo + +# Keyword arguments + +* `direction=`[`IdentityUpdateRule`](@ref)`()`: + specify to perform a certain processing of the direction, for example + [`Nesterov`](@ref), [`MomentumGradient`](@ref) or [`AverageGradient`](@ref). + +* $_kw_evaluation_default: + $_kw_evaluation $_kw_evaluation_example -Alternatively to `f` and `grad_f` you can provide -the [`AbstractManifoldGradientObjective`](@ref) `gradient_objective` directly. +* $_kw_retraction_method_default: + $_kw_retraction_method -# Optional -* `direction`: ([`IdentityUpdateRule`](@ref)) perform a processing of the direction, e.g. -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by allocation (default) form `grad_f(M, p)` - or [`InplaceEvaluation`](@ref) in place of the form `grad_f!(M, X, p)`. -* `retraction_method`: ([`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`)`(M, typeof(p))`) a retraction to use -* `stepsize`: ([`default_stepsize`](@ref)`(M, GradientDescentState)`) a [`Stepsize`](@ref) -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(200) | `[`StopWhenGradientNormLess`](@ref)`(1e-8)`) - a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. -* `X`: ([`zero_vector(M,p)`]) provide memory and/or type of the gradient to use` +* `stepsize=`[`default_stepsize`](@ref)`(M, GradientDescentState)`: + $_kw_stepsize -If you provide the [`ManifoldGradientObjective`](@ref) directly, `evaluation` is ignored. +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(200)`$_sc_any[`StopWhenGradientNormLess`](@ref)`(1e-8)`: + $_kw_stopping_criterion -All other keyword arguments are passed to [`decorate_state!`](@ref) for state decorators or -[`decorate_objective!`](@ref) for objective, respectively. -If you provide the [`ManifoldGradientObjective`](@ref) directly, these decorations can still be specified +* $_kw_X_default: + $_kw_X, the evaluated gradient ``$_l_grad f`` evaluated at ``p^{(k)}``. -# Output +$_kw_others -the obtained (approximate) minimizer ``p^*``. -To obtain the whole final state of the solver, see [`get_solver_return`](@ref) for details +If you provide the [`ManifoldGradientObjective`](@ref) directly, the `evaluation=` keyword is ignored. +The decorations are still applied to the objective. + +$_doc_remark_tutorial_debug + +$_doc_sec_output """ + +@doc "$(_doc_gradient_descent)" gradient_descent(M::AbstractManifold, args...; kwargs...) + function gradient_descent(M::AbstractManifold, f, grad_f; kwargs...) return gradient_descent(M, f, grad_f, rand(M); kwargs...) end @@ -194,30 +220,7 @@ function gradient_descent( return gradient_descent!(M, mgo, q; kwargs...) end -@doc raw""" - gradient_descent!(M, f, grad_f, p; kwargs...) - gradient_descent!(M, gradient_objective, p; kwargs...) - -perform a Gradient descent in-place of `p` - -```math -p_{k+1} = \operatorname{retr}_{p_k}\bigl( s_k\operatorname{grad}f(p_k) \bigr) -``` - -in place of `p` with different choices of ``s_k`` available. - -# Input - -* `M` a manifold ``\mathcal M`` -* `f` a cost function ``F:\mathcal M→ℝ`` to minimize -* `grad_f` the gradient ``\operatorname{grad}F:\mathcal M→ T\mathcal M`` of F -* `p` an initial value ``p ∈ \mathcal M`` - -Alternatively to `f` and `grad_f` you can provide -the [`AbstractManifoldGradientObjective`](@ref) `gradient_objective` directly. - -For more options, especially [`Stepsize`](@ref)s for ``s_k``, see [`gradient_descent`](@ref) -""" +"$(_doc_gradient_descent)" gradient_descent!(M::AbstractManifold, args...; kwargs...) function gradient_descent!( M::AbstractManifold, @@ -275,8 +278,8 @@ function initialize_solver!(mp::AbstractManoptProblem, s::GradientDescentState) get_gradient!(mp, s.X, s.p) return s end -function step_solver!(p::AbstractManoptProblem, s::GradientDescentState, i) - step, s.X = s.direction(p, s, i) +function step_solver!(p::AbstractManoptProblem, s::GradientDescentState, k) + step, s.X = s.direction(p, s, k) retract!(get_manifold(p), s.p, s.p, s.X, -step, s.retraction_method) return s end diff --git a/src/solvers/interior_point_Newton.jl b/src/solvers/interior_point_Newton.jl index defa743cf7..ac4df5f8c9 100644 --- a/src/solvers/interior_point_Newton.jl +++ b/src/solvers/interior_point_Newton.jl @@ -1,29 +1,25 @@ -_doc_IPN = raw""" +_doc_IPN_subsystem = + raw""" +```math +\operatorname{J} F(p, μ, λ, s)[X, Y, Z, W] = -F(p, μ, λ, s), +\text{ where } +""" * "X ∈ $(_l_TpM()), Y,W ∈ ℝ^m, Z ∈ ℝ^n,\n```\n" +_doc_IPN = """ interior_point_Newton(M, f, grad_f, Hess_f, p=rand(M); kwargs...) interior_point_Newton(M, cmo::ConstrainedManifoldObjective, p=rand(M); kwargs...) - interior_point_Newton!(M, f, grad_f, Hess_f, p; kwargs...) + interior_point_Newton!(M, f, grad]_f, Hess_f, p; kwargs...) interior_point_Newton(M, ConstrainedManifoldObjective, p; kwargs...) perform the interior point Newton method following [LaiYoshise:2024](@cite). In order to solve the constrained problem -```math -\begin{aligned} -\min_{p ∈\mathcal{M}} &f(p)\\ -\text{subject to } &g_i(p)\leq 0 \quad \text{ for } i= 1, …, m,\\ -\quad &h_j(p)=0 \quad \text{ for } j=1,…,n, -\end{aligned} -``` +$_problem_constrained This algorithms iteratively solves the linear system based on extending the KKT system by a slack variable `s`. -```math -\operatorname{J} F(p, μ, λ, s)[X, Y, Z, W] = -F(p, μ, λ, s), -\text{ where } -X ∈ T_p\mathcal M, Y,W ∈ ℝ^m, Z ∈ ℝ^n, -``` +$(_doc_IPN_subsystem) see [`CondensedKKTVectorFieldJacobian`](@ref) and [`CondensedKKTVectorField`](@ref), respectively, for the reduced form, this is usually solved in. @@ -41,11 +37,11 @@ the constraints are further fulfilled. # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f : \mathcal M → ℝ`` to minimize -* `grad_f`: the gradient ``\operatorname{grad} f : \mathcal M → T \mathcal M`` of ``f`` -* `Hess_f`: the Hessian ``\operatorname{Hess}f(p): T_p\mathcal M → T_p\mathcal M``, ``X ↦ \operatorname{Hess}f(p)[X] = ∇_X\operatorname{grad}f(p)`` -* `p=`[`rand`](@extref Base.rand-Tuple{AbstractManifold})`(M)`: an initial value ``p ∈ \mathcal M`` +* `M`: a manifold ``$(_l_M)`` +* `f`: a cost function ``f : $(_l_M) → ℝ`` to minimize +* `grad_f`: the gradient ``$(_l_grad) f : $(_l_M) → T $(_l_M)`` of ``f`` +* `Hess_f`: the Hessian ``$(_l_Hess)f(p): T_p$(_l_M) → T_p$(_l_M)``, ``X ↦ $(_l_Hess)f(p)[X] = ∇_X$(_l_grad)f(p)`` +* `p=$(_link_rand()): an initial value ``p ∈ $(_l_M)`` or a [`ConstrainedManifoldObjective`](@ref) `cmo` containing `f`, `grad_f`, `Hess_f`, and the constraints @@ -78,7 +74,7 @@ pass a [`ConstrainedManifoldObjective`](@ref) `cmo` * `s=copy(μ)`: initial value for the slack variables * `σ=`[`calculate_σ`](@ref)`(M, cmo, p, μ, λ, s)`: scaling factor for the barrier parameter `β` in the sub problem, which is updated during the iterations * `step_objective`: a [`ManifoldGradientObjective`](@ref) of the norm of the KKT vector field [`KKTVectorFieldNormSq`](@ref) and its gradient [`KKTVectorFieldNormSqGradient`](@ref) -* `step_problem`: the manifold ``\mathcal M × ℝ^m × ℝ^n × ℝ^m`` together with the `step_objective` +* `step_problem`: the manifold ``$(_l_M) × ℝ^m × ℝ^n × ℝ^m`` together with the `step_objective` as the problem the linesearch `stepsize=` employs for determining a step size * `step_state`: the [`StepsizeState`](@ref) with point and search direction * `stepsize` an [`ArmijoLinesearch`](@ref) with the [`InteriorPointCentralityCondition`](@ref) as @@ -87,11 +83,11 @@ pass a [`ConstrainedManifoldObjective`](@ref) `cmo` a stopping criterion, by default depending on the residual of the KKT vector field or a maximal number of steps, which ever hits first. * `sub_kwargs=(;)`: keyword arguments to decorate the sub options, for example debug, that automatically respects the main solvers debug options (like sub-sampling) as well * `sub_objective`: The [`SymmetricLinearSystemObjective`](@ref) modelling the system of equations to use in the sub solver, - includes the [`CondensedKKTVectorFieldJacobian`](@ref) ``\mathcal A(X)`` and the [`CondensedKKTVectorField`](@ref) ``b`` in ``\mathcal A(X) + b = 0`` we aim to solve. - This is used to setup the `sub_problem`. If you set the `sub_problem` directly, this keyword has no effect. -* `sub_stopping_criterion=`[`StopAfterIteration`](@ref)`(manifold_dimension(M))`[` | `](@ref StopWhenAny)[`StopWhenRelativeResidualLess`](@ref)`(c,1e-8)`, where ``c = \lVert b \rVert`` from the system to solve. - This keyword is used in the `sub_state`. If you set that keyword diretly, this keyword does not have an effect. -* `sub_problem`: combining the `sub_objective` and the tangent space at ``(p,λ)``` on the manifold ``\mathcal M × ℝ^n`` to a manopt problem. + includes the [`CondensedKKTVectorFieldJacobian`](@ref) ``$(_l_cal("A"))(X)`` and the [`CondensedKKTVectorField`](@ref) ``b`` in ``$(_l_cal("A"))(X) + b = 0`` we aim to solve. + $(_kw_used_in("sub_problem")) +* `sub_stopping_criterion=`[`StopAfterIteration`](@ref)`(manifold_dimension(M))`[` | `](@ref StopWhenAny)[`StopWhenRelativeResidualLess`](@ref)`(c,1e-8)`, where ``c = $(_l_norm("b"))`` from the system to solve. + $(_kw_used_in("sub_state")) +* `sub_problem`: combining the `sub_objective` and the tangent space at ``(p,λ)``` on the manifold ``$(_l_M) × ℝ^n`` to a manopt problem. This is the manifold and objective for the sub solver. * `sub_state=`[`ConjugateResidualState`](@ref): a state specifying the subsolver. This default is also decorated with the `sub_kwargs...`. * `vector_space=`[`Rn`](@ref Manopt.Rn) a function that, given an integer, returns the manifold to be used for the vector space components ``ℝ^m,ℝ^n`` @@ -263,8 +259,7 @@ function interior_point_Newton!( ), sub_stopping_criterion::StoppingCriterion=StopAfterIteration(manifold_dimension(M)) | StopWhenRelativeResidualLess( - norm(_sub_M, _sub_p, get_b(TangentSpace(_sub_M, _sub_p), sub_objective, _sub_X)), - 1e-8, + norm(_sub_M, _sub_p, get_b(TangentSpace(_sub_M, _sub_p), sub_objective)), 1e-8 ), sub_state::St=decorate_state!( ConjugateResidualState( @@ -314,7 +309,7 @@ function initialize_solver!(::AbstractManoptProblem, ips::InteriorPointNewtonSta return ips end -function step_solver!(amp::AbstractManoptProblem, ips::InteriorPointNewtonState, i) +function step_solver!(amp::AbstractManoptProblem, ips::InteriorPointNewtonState, k) M = get_manifold(amp) cmo = get_objective(amp) N = base_manifold(get_manifold(ips.sub_problem)) @@ -367,7 +362,7 @@ function step_solver!(amp::AbstractManoptProblem, ips::InteriorPointNewtonState, end set_manopt_parameter!(ips.stepsize, :DecreaseCondition, :τ, N, q) # determine stepsize - α = ips.stepsize(ips.step_problem, ips.step_state, i) + α = ips.stepsize(ips.step_problem, ips.step_state, k) # Update Parameters and slack retract!(M, ips.p, ips.p, α * ips.X, ips.retraction_method) if m > 0 diff --git a/src/solvers/particle_swarm.jl b/src/solvers/particle_swarm.jl index 90d2841edd..63648e2a7e 100644 --- a/src/solvers/particle_swarm.jl +++ b/src/solvers/particle_swarm.jl @@ -1,22 +1,21 @@ # # State # -@doc raw""" +@doc """ ParticleSwarmState{P,T} <: AbstractManoptSolverState Describes a particle swarm optimizing algorithm, with # Fields -* `cognitive_weight`: (`1.4`) a cognitive weight factor -* `inertia`: (`0.65`) the inertia of the particles -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, eltype(swarm))`) an inverse retraction to use. -* `retraction_method`: (`default_retraction_method(M, eltype(swarm))`) the retraction to use -* `social_weight`: (`1.4`) a social weight factor -* `stopping_criterion`: (`[`StopAfterIteration`](@ref)`(500) | `[`StopWhenChangeLess`](@ref)`(1e-4)`) - a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. -* `vector_transport_method`: (`default_vector_transport_method(M, eltype(swarm))`) a vector transport to use -* `velocity`: a set of tangent vectors (of type `AbstractVector{T}`) representing the velocities of the particles +* `cognitive_weight`: a cognitive weight factor +* `inertia`: the inertia of the particles +* $(_field_inv_retr) +* $(_field_retr) +* `social_weight`: a social weight factor +* $(_field_stop) +* $(_field_vector_transp) +* `velocity`: a set of tangent vectors (of type `AbstractVector{T}`) representing the velocities of the particles # Internal and temporary fields @@ -25,14 +24,24 @@ Describes a particle swarm optimizing algorithm, with * `positional_best`: storing the best position ``p_i`` every single swarm participant visited * `q`: temporary storage for a point to avoid allocations during a step of the algorithm * `social_vec`: temporary storage for a tangent vector related to `social_weight` -* `swarm`: a set of points (of type `AbstractVector{P}`) on a manifold ``\{s_i\}_{i=1}^N`` +* `swarm`: a set of points (of type `AbstractVector{P}`) on a manifold ``$(_math_sequence("a","i","1","N"))`` # Constructor ParticleSwarmState(M, initial_swarm, velocity; kawrgs...) -construct a particle swarm solver state for the manifold `M` starting at initial population `x0` with `velocities`, -where the manifold is used within the defaults specified previously. All fields with defaults are keyword arguments here. +construct a particle swarm solver state for the manifold `M` starting with the initial population `initial_swarm` with `velocities`. +The `p` used in the following defaults is the type of one point from the swarm. + +# Keyword arguments + +* `cognitive_weight=1.4` +* `inertia=0.65` +* `inverse_retraction_method=default_inverse_retraction_method(M, eltype(swarm))`: an inverse retraction to use. +* $(_kw_retraction_method_default) +* `social_weight=1.4` +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(500)`$(_sc_any)[`StopWhenChangeLess`](@ref)`(1e-4)` +* $(_kw_vector_transport_method_default) # See also @@ -144,89 +153,102 @@ end # # Constructors # -@doc raw""" +_doc_swarm = raw"``S = \{s_1, \ldots, s_n\}``" +_doc_velocities = raw""" +```math + X_k^{(i)} = ω \, \operatorname{T}_{s_k^{(i)}\gets s_k^{(i-1)}}X_k^{(i-1)} + c r_1 \operatorname{retr}_{s_k^{(i)}}^{-1}(p_k^{(i)}) + s r_2 \operatorname{retr}_{s_k^{(i)}}^{-1}(p), +``` +""" +_doc_particle_update = raw""" +```math +s_k^{(i+1)} = \operatorname{retr}_{s_k^{(i)}}(X_k^{(i)}), +``` +""" +_doc_swarm_best = raw""" +```math +p_k^{(i+1)} = \begin{cases} +s_k^{(i+1)}, & \text{if } F(s_k^{(i+1)}) 0 && norm(c.velocity_norms) < c.threshold - c.at_iteration = i + if k > 0 && norm(c.velocity_norms) < c.threshold + c.at_iteration = k return true end return false diff --git a/src/solvers/primal_dual_semismooth_Newton.jl b/src/solvers/primal_dual_semismooth_Newton.jl index 7a18a7d51c..39aac03a3f 100644 --- a/src/solvers/primal_dual_semismooth_Newton.jl +++ b/src/solvers/primal_dual_semismooth_Newton.jl @@ -1,43 +1,51 @@ -@doc raw""" - primal_dual_semismooth_Newton(M, N, cost, p, X, m, n, prox_F, diff_prox_F, prox_G_dual, diff_prox_dual_G, linearized_operator, adjoint_linearized_operator) - -Perform the Primal-Dual Riemannian semismooth Newton algorithm. - +_doc_PDSN_formula = raw""" Given a `cost` function ``\mathcal E: \mathcal M → \overline{ℝ}`` of the form ```math \mathcal E(p) = F(p) + G( Λ(p) ), ``` where ``F: \mathcal M → \overline{ℝ}``, ``G: \mathcal N → \overline{ℝ}``, and ``\Lambda: \mathcal M → \mathcal N``. The remaining input parameters are +""" + +_doc_PDSN = """ + primal_dual_semismooth_Newton(M, N, cost, p, X, m, n, prox_F, diff_prox_F, prox_G_dual, diff_prox_dual_G, linearized_operator, adjoint_linearized_operator) + +Perform the Primal-Dual Riemannian semismooth Newton algorithm. -* `p, X`: primal and dual start points ``p∈\mathcal M`` and ``X ∈ T_n\mathcal N`` -* `m,n`: base points on ``\mathcal M`` and ``\mathcal N``, respectively. +$(_doc_PDSN_formula) + +* `p, X`: primal and dual start points ``p∈$(_l_M)`` and ``X ∈ T_n$(_l_Manifold("N"))`` +* `m,n`: base points on ``$(_l_M)`` and ``$(_l_Manifold("N"))`, respectively. * `linearized_forward_operator`: the linearization ``DΛ(⋅)[⋅]`` of the operator ``Λ(⋅)``. -* `adjoint_linearized_operator`: the adjoint ``DΛ^*`` of the linearized operator ``DΛ(m): T_{m}\mathcal M → T_{Λ(m)}\mathcal N`` -* `prox_F, prox_G_Dual`: the proximal maps of ``F`` and ``G^\ast_n`` -* `diff_prox_F, diff_prox_dual_G`: the (Clarke Generalized) differentials of the proximal maps of ``F`` and ``G^\ast_n`` +* `adjoint_linearized_operator`: the adjoint ``DΛ^*`` of the linearized operator ``DΛ(m): $(_l_TpM("m")) → T_{Λ(m)}$(_l_Manifold("N"))`` +* `prox_F, prox_G_Dual`: the proximal maps of ``F`` and ``G^\\ast_n`` +* `diff_prox_F, diff_prox_dual_G`: the (Clarke Generalized) differentials of the proximal maps of ``F`` and ``G^\\ast_n`` For more details on the algorithm, see [DiepeveenLellmann:2021](@cite). -# Optional parameters +# Keyword arguments -* `primal_stepsize`: (`1/sqrt(8)`) proximal parameter of the primal prox -* `Λ`: (`missing`) the exact operator, that is required if `Λ(m)=n` does not hold; +* `dual_stepsize=1/sqrt(8)`: proximal parameter of the dual prox +* $(_kw_evaluation_default): $(_kw_evaluation) +* $(_kw_inverse_retraction_method_default): $(_kw_inverse_retraction_method) +* `Λ=missing`: the exact operator, that is required if `Λ(m)=n` does not hold; `missing` indicates, that the forward operator is exact. -* `dual_stepsize`: (`1/sqrt(8)`) proximal parameter of the dual prox -* `reg_param`: (`1e-5`) regularisation parameter for the Newton matrix +* `primal_stepsize=1/sqrt(8)`: proximal parameter of the primal prox +* `reg_param=1e-5`: regularisation parameter for the Newton matrix Note that this changes the arguments the `forward_operator` is called. -* `stopping_criterion`: (`stopAtIteration(50)`) a [`StoppingCriterion`](@ref) -* `update_primal_base`: (`missing`) function to update `m` (identity by default/missing) -* `update_dual_base`: (`missing`) function to update `n` (identity by default/missing) -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) the retraction to use -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction to use. -* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport to use +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stopping_criterion=[`StopAfterIteration`](@ref)`(50)`: + $(_kw_stopping_criterion) +* `update_primal_base=missing`: function to update `m` (identity by default/missing) +* `update_dual_base=missing`: function to update `n` (identity by default/missing) +* $(_kw_vector_transport_method_default): $(_kw_vector_transport_method) -# Output +$(_kw_others) -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$(_doc_sec_output) """ + +@doc "$(_doc_PDSN)" function primal_dual_semismooth_Newton( M::AbstractManifold, N::AbstractManifold, @@ -77,12 +85,8 @@ function primal_dual_semismooth_Newton( kwargs..., ) end -@doc raw""" - primal_dual_semismooth_Newton(M, N, cost, x0, ξ0, m, n, prox_F, diff_prox_F, prox_G_dual, diff_prox_G_dual, linearized_forward_operator, adjoint_linearized_operator) -Perform the Riemannian Primal-dual Riemannian semismooth Newton algorithm in place of `x`, `ξ`, and potentially `m`, -`n` if they are not fixed. See [`primal_dual_semismooth_Newton`](@ref) for details and optional parameters. -""" +@doc "$(_doc_PDSN)" function primal_dual_semismooth_Newton!( M::mT, N::nT, diff --git a/src/solvers/proximal_bundle_method.jl b/src/solvers/proximal_bundle_method.jl index 26afda17f9..f71cb28bce 100644 --- a/src/solvers/proximal_bundle_method.jl +++ b/src/solvers/proximal_bundle_method.jl @@ -1,43 +1,57 @@ -@doc raw""" +@doc """ ProximalBundleMethodState <: AbstractManoptSolverState stores option values for a [`proximal_bundle_method`](@ref) solver. # Fields +* `α`: curvature-dependent parameter used to update `η` +* `α₀`: initialization value for `α`, used to update `η` * `approx_errors`: approximation of the linearization errors at the last serious step * `bundle`: bundle that collects each iterate with the computed subgradient at the iterate -* `bundle_size`: (`50`) the maximal size of the bundle -* `c`: convex combination of the approximation errors -* `d`: descent direction -* `inverse_retraction_method`: the inverse retraction to use within -* `m`: (`0.0125`) the parameter to test the decrease of the cost -* `p`: current candidate point -* `p_last_serious`: last serious iterate -* `retraction_method`: the retraction to use within -* `stop`: a [`StoppingCriterion`](@ref) -* `transported_subgradients`: subgradients of the bundle that are transported to `p_last_serious` -* `vector_transport_method`: the vector transport method to use within -* `X`: (`zero_vector(M, p)`) the current element from the possible subgradients - at `p` that was last evaluated. -* `α₀`: (`1.2`) initialization value for `α`, used to update `η` -* `α`: curvature-dependent parameter used to update `η` -* `ε`: (`1e-2`) stepsize-like parameter related to the injectivity radius of the manifold -* `δ`: parameter for updating `μ`: if ``δ < 0`` then ``μ = \log(i + 1)``, else ``μ += δ μ`` -* `η`: curvature-dependent term for updating the approximation errors -* `λ`: convex coefficients that solve the subproblem -* `μ`: (`0.5`) (initial) proximal parameter for the subproblem -* `ν`: the stopping parameter given by ``ν = - μ |d|^2 - c`` -* `sub_problem`: a function evaluating with new allocations that solves the sub problem on `M` given the last serious iterate `p_last_serious`, the linearization errors `linearization_errors`, and the transported subgradients `transported_subgradients`, -* `sub_state`: an [`AbstractManoptSolverState`](@ref) for the subsolver +* `bundle_size`: the maximal size of the bundle +* `c`: convex combination of the approximation errors +* `d`: descent direction +* `δ`: parameter for updating `μ`: if ``δ < 0`` then ``μ = \\log(i + 1)``, else ``μ += δ μ`` +* `ε`: stepsize-like parameter related to the injectivity radius of the manifold +* `η`: curvature-dependent term for updating the approximation errors +* $(_field_inv_retr) +* `λ`: convex coefficients that solve the subproblem +* `m`: the parameter to test the decrease of the cost +* `μ`: (initial) proximal parameter for the subproblem +* `ν`: the stopping parameter given by ``ν = - μ |d|^2 - c`` +* `p`: current candidate point +* `p_last_serious`: last serious iterate +* $(_field_retr) +* $(_field_stop) +* `transported_subgradients`: subgradients of the bundle that are transported to `p_last_serious` +* $(_field_vector_transp) +* $(_field_subgradient) +* $(_field_sub_problem) +* $(_field_sub_state) # Constructor - ProximalBundleMethodState(M::AbstractManifold, p; kwargs...) - -with keywords for all fields from before besides `p_last_serious` which obtains the same type as `p`. -You can use for example `X=` to specify the type of tangent vector to use - + ProximalBundleMethodState(M::AbstractManifold, p=rand(M); kwargs...) + +Generate the state for the [`proximal_bundle_method`](@ref) on the manifold `M` +with initial point `p`. + +# Keyword arguments + +* `α₀=1.2` +* `bundle_size=50` +* `δ=1.0` +* `ε=1e-2` +* `μ=0.5` +* `m=0.0125` +* $(_kw_inverse_retraction_method_default): $(_kw_inverse_retraction_method) +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stopping_criterion=`[`StopWhenLagrangeMultiplierLess`](@ref)`(1e-8)`$(_sc_any)[`StopAfterIteration`](@ref)`(5000)` +* `sub_problem=`[`proximal_bundle_method_subsolver`](@ref) +* `sub_state=`[`AllocatingEvaluation`](@ref) +* $(_kw_vector_transport_method_default): $(_kw_vector_transport_method) +* `X=`$(_link_zero_vector()) specify the type of tangent vector to use. """ mutable struct ProximalBundleMethodState{ P, @@ -180,17 +194,24 @@ function show(io::IO, pbms::ProximalBundleMethodState) return print(io, s) end -@doc raw""" - proximal_bundle_method(M, f, ∂f, p) - -perform a proximal bundle method ``p_{j+1} = \mathrm{retr}(p_k, -d_k)``, where +_doc_PBM_dk = raw""" ```math -d_k = \frac{1}{\mu_l} \sum_{j\in J_k} λ_j^k \mathrm{P}_{p_k←q_j}X_{q_j}, +d_k = \frac{1}{\mu_k} \sum_{j\in J_k} λ_j^k \mathrm{P}_{p_k←q_j}X_{q_j}, ``` -where ``X_{q_j}\in∂f(q_j)``, ``\mathrm{retr}`` is a retraction, -``p_k`` is the last serious iterate, ``\mu_l`` is a proximal parameter, and the -``λ_j^k`` are solutions to the quadratic subproblem provided by the -[`proximal_bundle_method_subsolver`](@ref). + +with ``X_{q_j} ∈ ∂f(q_j)``, ``p_k`` the last serious iterate, +``\mu_k`` a proximal parameter, and the +``λ_j^k`` as solutions to the quadratic subproblem provided by the +sub solver, see for example the [`proximal_bundle_method_subsolver`](@ref). +""" +_doc_PBM = """ + proximal_bundle_method(M, f, ∂f, p, kwargs...) + proximal_bundle_method!(M, f, ∂f, p, kwargs...) + +perform a proximal bundle method ``p^{(k+1)} = $(_l_retr)_{p^{(k)}}(-d_k)``, +where ``$(_l_retr)`` is a retraction and + +$(_doc_PBM_dk) Though the subdifferential might be set valued, the argument `∂f` should always return _one_ element from the subdifferential, but not necessarily deterministic. @@ -199,55 +220,42 @@ For more details see [HoseiniMonjeziNobakhtianPouryayevali:2021](@cite). # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``F:\mathcal M → ℝ`` to minimize -* `∂f`: the (sub)gradient ``∂ f: \mathcal M → T\mathcal M`` of f - restricted to always only returning one value/element from the subdifferential. - This function can be passed as an allocation function `(M, p) -> X` or - a mutating function `(M, X, p) -> X`, see `evaluation`. -* `p`: an initial value ``p ∈ \mathcal M`` - -# Optional - -* `m`: a real number that controls the decrease of the cost function -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the subgradient works by - allocation (default) form `∂f(M, q)` or [`InplaceEvaluation`](@ref) in place, - that is it is of the form `∂f!(M, X, p)`. -* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction method to use -* `retraction`: (`default_retraction_method(M, typeof(p))`) a `retraction(M, p, X)` to use. -* `stopping_criterion`: ([`StopWhenLagrangeMultiplierLess`](@ref)`(1e-8)`) - a functor, see[`StoppingCriterion`](@ref), indicating when to stop. -* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport method to use - -and the ones that are passed to [`decorate_state!`](@ref) for decorators. - -# Output - -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +* $(_arg_M) +* $(_arg_f) +* $(_arg_subgrad_f) +* $(_arg_p) + +# Keyword arguments + +* `α₀=1.2`: initialization value for `α`, used to update `η` +* `bundle_size=50`: the maximal size of the bundle +* `δ=1.0`: parameter for updating `μ`: if ``δ < 0`` then ``μ = \\log(i + 1)``, else ``μ += δ μ`` +* `ε=1e-2`: stepsize-like parameter related to the injectivity radius of the manifold +* $(_kw_evaluation_default): $(_kw_evaluation) +* $(_kw_inverse_retraction_method_default): $(_kw_inverse_retraction_method) +* `m=0.0125`: a real number that controls the decrease of the cost function +* `μ=0.5`: initial proximal parameter for the subproblem +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stopping_criterion=`[`StopWhenLagrangeMultiplierLess`](@ref)`(1e-8)`$(_sc_any)[`StopAfterIteration`](@ref)`(5000)`: + $(_kw_stopping_criterion) +* `sub_problem=`[`proximal_bundle_method_subsolver`](@ref) +* `sub_state=`[`AllocatingEvaluation`](@ref) +* $(_kw_vector_transport_method_default): $(_kw_vector_transport_method) + +$(_kw_others) + +$(_doc_sec_output) """ + +@doc "$(_doc_PBM)" function proximal_bundle_method( M::AbstractManifold, f::TF, ∂f::TdF, p; kwargs... ) where {TF,TdF} p_star = copy(M, p) return proximal_bundle_method!(M, f, ∂f, p_star; kwargs...) end -@doc raw""" - proximal_bundle_method!(M, f, ∂f, p) -perform a proximal bundle method ``p_{j+1} = \mathrm{retr}(p_k, -d_k)`` in place of `p` - -# Input - -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f:\mathcal M→ℝ`` to minimize -* `∂f`: the (sub)gradient ``\partial f:\mathcal M→ T\mathcal M`` of F - restricted to always only returning one value/element from the subdifferential. - This function can be passed as an allocation function `(M, p) -> X` or - a mutating function `(M, X, p) -> X`, see `evaluation`. -* `p`: an initial value ``p_0=p ∈ \mathcal M`` - -for more details and all optional parameters, see [`proximal_bundle_method`](@ref). -""" +@doc "$(_doc_PBM)" function proximal_bundle_method!( M::AbstractManifold, f::TF, @@ -304,7 +312,7 @@ function initialize_solver!( push!(pbms.λ, zero(R)) return pbms end -function step_solver!(mp::AbstractManoptProblem, pbms::ProximalBundleMethodState, i) +function step_solver!(mp::AbstractManoptProblem, pbms::ProximalBundleMethodState, k) M = get_manifold(mp) pbms.transported_subgradients = [ if qj ≈ pbms.p_last_serious @@ -358,7 +366,7 @@ function step_solver!(mp::AbstractManoptProblem, pbms::ProximalBundleMethodState if get_cost(mp, pbms.p) ≤ (get_cost(mp, pbms.p_last_serious) + pbms.m * pbms.ν) copyto!(M, pbms.p_last_serious, pbms.p) if pbms.δ < zero(eltype(pbms.μ)) - pbms.μ = log(i + 1) + pbms.μ = log(k + 1) else pbms.μ += pbms.δ * pbms.μ end @@ -440,25 +448,25 @@ function _proximal_bundle_subsolver!( end function (sc::StopWhenLagrangeMultiplierLess)( - mp::AbstractManoptProblem, pbms::ProximalBundleMethodState, i::Int + mp::AbstractManoptProblem, pbms::ProximalBundleMethodState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init sc.at_iteration = -1 end M = get_manifold(mp) - if (sc.mode == :estimate) && (-pbms.ν ≤ sc.tolerances[1]) && (i > 0) + if (sc.mode == :estimate) && (-pbms.ν ≤ sc.tolerances[1]) && (k > 0) sc.values[1] = -pbms.ν - sc.at_iteration = i + sc.at_iteration = k return true end nd = norm(M, pbms.p_last_serious, pbms.d) if (sc.mode == :both) && (pbms.c ≤ sc.tolerances[1]) && (nd ≤ sc.tolerances[2]) && - (i > 0) + (k > 0) sc.values[1] = pbms.c sc.values[2] = nd - sc.at_iteration = i + sc.at_iteration = k return true end return false @@ -480,14 +488,14 @@ to deactivate the warning, then this [`DebugAction`](@ref) is inactive. All other symbols are handled as if they were `:Always:` """ function (d::DebugWarnIfLagrangeMultiplierIncreases)( - ::AbstractManoptProblem, st::ProximalBundleMethodState, i::Int + ::AbstractManoptProblem, st::ProximalBundleMethodState, k::Int ) - (i < 1) && (return nothing) + (k < 1) && (return nothing) if d.status !== :No new_value = -st.ν if new_value ≥ d.old_value * d.tol @warn """The stopping parameter increased by at least $(d.tol). - At iteration #$i the stopping parameter -ν increased from $(d.old_value) to $(new_value).\n + At iteration #$k the stopping parameter -ν increased from $(d.old_value) to $(new_value).\n Consider changing either the initial proximal parameter `μ`, its update coefficient `δ`, or the stepsize-like parameter `ε` related to the invectivity radius of the manifold in the `proximal_bundle_method` call. @@ -498,7 +506,7 @@ function (d::DebugWarnIfLagrangeMultiplierIncreases)( end elseif new_value < zero(number_eltype(st.ν)) @warn """The stopping parameter is negative. - At iteration #$i the stopping parameter -ν became negative.\n + At iteration #$k the stopping parameter -ν became negative.\n Consider changing either the initial proximal parameter `μ`, its update coefficient `δ`, or the stepsize-like parameter `ε` related to the invectivity radius of the manifold in the `proximal_bundle_method` call. diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl index 86e5b01c6d..bae0128676 100644 --- a/src/solvers/quasi_Newton.jl +++ b/src/solvers/quasi_Newton.jl @@ -1,42 +1,41 @@ -@doc raw""" +@doc """ QuasiNewtonState <: AbstractManoptSolverState -These Quasi Newton [`AbstractManoptSolverState`](@ref) represent any quasi-Newton based method and can be -used with any update rule for the direction. +The [`AbstractManoptSolverState`](@ref) represent any quasi-Newton based method and stores +all necessary fields. # Fields -* `p` the current iterate, a point on a manifold -* `X` the current gradient -* `sk` the current step -* `yk` the current gradient difference -* `direction_update` an [`AbstractQuasiNewtonDirectionUpdate`](@ref) rule. -* `nondescent_direction_behavior` a `Symbol` to specify how to handle direction that are not descent ones. -* `retraction_method` an `AbstractRetractionMethod` -* `stepsize` a [`Stepsize`](@ref) -* `stop` a [`StoppingCriterion`](@ref) - -as well as for internal use +* `direction_update`: an [`AbstractQuasiNewtonDirectionUpdate`](@ref) rule. +* `η`: the current update direction +* `nondescent_direction_behavior`: a `Symbol` to specify how to handle direction that are not descent ones. +* `nondescent_direction_value`: the value from the last inner product from checking for descent directions +* $(_field_p) +* `p_old`: the last iterate +* `sk`: the current step +* `yk`: the current gradient difference +* $(_field_retr) +* $(_field_step) +* $(_field_stop) +* $(_field_gradient) +* `X_old`: the last gradient -* `p_old` the last iterate -* `η` the current update direction -* `X_old` the last gradient -* `nondescent_direction_value` the value from the last inner product from checking for descent directions # Constructor - QuasiNewtonState( - M::AbstractManifold, - x; - initial_vector=zero_vector(M,x), - direction_update::D=QuasiNewtonLimitedMemoryDirectionUpdate(M, x, InverseBFGS(), 20; - vector_transport_method=vector_transport_method, - ) - stopping_criterion=StopAfterIteration(1000) | StopWhenGradientNormLess(1e-6), - retraction_method::RM=default_retraction_method(M, typeof(p)), - vector_transport_method::VTM=default_vector_transport_method(M, typeof(p)), - stepsize=default_stepsize(M; QuasiNewtonState) - ) + QuasiNewtonState(M::AbstractManifold, p; kwargs...) + +Generate the Quasi Newton state on the manifold `M` with start point `p`. + +## Keyword arguments + +* `direction_update=`[`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref)`(M, p, InverseBFGS(), 20; vector_transport_method=vector_transport_method)` +* `stopping_criterion=`[`StopAfterIteration`9(@ref)`(1000)`$(_sc_any)[`StopWhenGradientNormLess`](@ref)`(1e-6)` +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stepsize=default_stepsize(M; QuasiNewtonState)`: $(_kw_stepsize) + The default here is the [`WolfePowellLinesearch`](@ref) using the keywords `retraction_method` and `vector_transport_method` +* $(_kw_vector_transport_method_default): $(_kw_vector_transport_method) +* $(_kw_X_default) # See also @@ -70,7 +69,8 @@ end function QuasiNewtonState( M::AbstractManifold, p::P; - initial_vector::T=zero_vector(M, p), + initial_vector::T=zero_vector(M, p), # deprecated + X::T=initial_vector, vector_transport_method::VTM=default_vector_transport_method(M, typeof(p)), direction_update::D=QuasiNewtonLimitedMemoryDirectionUpdate( M, p, InverseBFGS(), 20; vector_transport_method=vector_transport_method @@ -94,19 +94,18 @@ function QuasiNewtonState( RM<:AbstractRetractionMethod, VTM<:AbstractVectorTransportMethod, } - sk_init = zero_vector(M, p) - return QuasiNewtonState{P,typeof(sk_init),D,SC,typeof(stepsize),RM,VTM,Float64}( + return QuasiNewtonState{P,T,D,SC,S,RM,VTM,Float64}( p, copy(M, p), - copy(M, p, initial_vector), - initial_vector, - sk_init, - copy(M, sk_init), + copy(M, p, X), + X, + copy(M, p, X), + copy(M, p, X), direction_update, retraction_method, stepsize, stopping_criterion, - copy(M, p, initial_vector), + copy(M, p, X), vector_transport_method, nondescent_direction_behavior, 1.0, @@ -175,64 +174,76 @@ function default_stepsize( linesearch_stopsize=1e-10, ) end -@doc raw""" - quasi_Newton(M, f, grad_f, p) +_doc_QN_init_scaling = raw"``\frac{⟨s_k,y_k⟩_{p_k}}{\lVert y_k\rVert_{p_k}}``" +_doc_QN = """ + quasi_Newton(M, f, grad_f, p; kwargs...) + quasi_Newton!(M, f, grad_f, p; kwargs...) -Perform a quasi Newton iteration for `f` on the manifold `M` starting -in the point `p`. +Perform a quasi Newton iteration to solve +$(_problem_default) + +with start point `p`. The iterations can be done in-place of `p```=p^{(0)}``. The ``k``th iteration consists of -1. Compute the search direction ``η_k = -\mathcal{B}_k [\operatorname{grad}f (p_k)]`` or solve ``\mathcal{H}_k [η_k] = -\operatorname{grad}f (p_k)]``. -2. Determine a suitable stepsize ``α_k`` along the curve ``\gamma(α) = R_{p_k}(α η_k)``, usually by using [`WolfePowellLinesearch`](@ref). -3. Compute `p_{k+1} = R_{p_k}(α_k η_k)``. -4. Define ``s_k = T_{p_k, α_k η_k}(α_k η_k)`` and ``y_k = \operatorname{grad}f(p_{k+1}) - T_{p_k, α_k η_k}(\operatorname{grad}f(p_k))``. -5. Compute the new approximate Hessian ``H_{k+1}`` or its inverse ``B_k``. +1. Compute the search direction ``η^{(k)} = -$(_l_cal("B"))_k [$(_l_grad)f (p^{(k)})]`` or solve ``$(_l_cal("H"))_k [η^{(k)}] = -$(_l_grad)f (p^{(k)})]``. +2. Determine a suitable stepsize ``α_k`` along the curve ``γ(α) = R_{p^{(k)}}(α η^{(k)})``, usually by using [`WolfePowellLinesearch`](@ref). +3. Compute ``p^{(k+1)} = R_{p^{(k)}}(α_k η^{(k)})``. +4. Define ``s_k = $(_l_cal("T"))_{p^{(k)}, α_k η^{(k)}}(α_k η^{(k)})`` and ``y_k = $(_l_grad)f(p^{(k+1)}) - $(_l_cal("T"))_{p^{(k)}, α_k η^{(k)}}($(_l_grad)f(p^{(k)}))``, where ``$(_l_cal("T"))`` denotes a vector transport. +5. Compute the new approximate Hessian ``H_{k+1}`` or its inverse ``B_{k+1}``. # Input -* `M` a manifold ``\mathcal{M}``. -* `f` a cost function ``F : \mathcal{M} →ℝ`` to minimize. -* `grad_f` the gradient ``\operatorname{grad}F : \mathcal{M} →T_x\mathcal M`` of ``F``. -* `p` an initial value ``p ∈ \mathcal{M}``. - -# Optional - -* `basis`: (`DefaultOrthonormalBasis()`) basis within the tangent spaces - to represent the Hessian (inverse). -* `cautious_update`: (`false`) whether or not to use - a [`QuasiNewtonCautiousDirectionUpdate`](@ref) -* `cautious_function`: (`(x) -> x*10^(-4)`) a monotone increasing function that is zero - at 0 and strictly increasing at 0 for the cautious update. -* `direction_update`: ([`InverseBFGS`](@ref)`()`) the update rule to use. -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by - allocation (default) form `gradF(M, p)` or [`InplaceEvaluation`](@ref) in place of form `gradF!(M, X, p)`. -* `initial_operator`: (`Matrix{Float64}(I, n, n)`) initial matrix to use die the - approximation, where `n=manifold_dimension(M)`, see also `scale_initial_operator`. -* `memory_size`: (`20`) limited memory, number of ``s_k, y_k`` to store. Set to a negative - value to use a full memory representation -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a retraction method to use -* `scale_initial_operator`: (`true`) scale initial operator with - ``\frac{⟨s_k,y_k⟩_{p_k}}{\lVert y_k\rVert_{p_k}}`` in the computation -* `stabilize`: (`true`) stabilize the method numerically by projecting computed (Newton-) - directions to the tangent space to reduce numerical errors -* `stepsize`: ([`WolfePowellLinesearch`](@ref)`(retraction_method, vector_transport_method)`) - specify a [`Stepsize`](@ref). -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(max(1000, memory_size)) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`) - specify a [`StoppingCriterion`](@ref) -* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport to use. -* `nondescent_direction_behavior`: (`:reinitialize_direction_update`) specify how non-descent direction is handled. - This can be +$(_arg_M) +$(_arg_f) +$(_arg_grad_f) +$(_arg_p) + +# Keyword arguments + +* `basis=`[`DefaultOrthonormalBasis`](@extref ManifoldsBase.DefaultOrthonormalBasis)`()`: + basis to use within each of the the tangent spaces to represent + the Hessian (inverse) for the cases where it is stored in full (matrix) form. +* `cautious_update=false`: + whether or not to use the [`QuasiNewtonCautiousDirectionUpdate`](@ref) + which wraps the `direction_upate`. +* `cautious_function=(x) -> x * 1e-4`: + a monotone increasing function for the cautious update that is zero at ``x=0`` + and strictly increasing at ``0`` +* `direction_update=`[`InverseBFGS`](@ref)`()`: + the [`AbstractQuasiNewtonUpdateRule`](@ref) to use. +* $(_kw_evaluation_default): + $(_kw_evaluation) + $(_kw_evaluation_example) +* `initial_operator=Matrix{Float64}(I, n, n)`: + initial matrix to use in case the Hessian (inverse) approximation is stored as a full matrix, + that is `n=manifold_dimension(M)`. This matrix is only allocated for the full matrix case. + See also `scale_initial_operator`. +* `memory_size=20`: limited memory, number of ``s_k, y_k`` to store. + Set to a negative value to use a full memory (matrix) representation +* `nondescent_direction_behavior=:reinitialize_direction_update`: + specify how non-descent direction is handled. This can be * `:step_towards_negative_gradient`: the direction is replaced with negative gradient, a message is stored. * `:ignore`: the verification is not performed, so any computed direction is accepted. No message is stored. * `:reinitialize_direction_update`: discards operator state stored in direction update rules. * any other value performs the verification, keeps the direction but stores a message. A stored message can be displayed using [`DebugMessages`](@ref). +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `scale_initial_operator=true`: scale initial operator with $(_doc_QN_init_scaling) in the computation +* `stabilize=true`: stabilize the method numerically by projecting computed (Newton-) + directions to the tangent space to reduce numerical errors +* `stepsize=`[`WolfePowellLinesearch`](@ref)`(retraction_method, vector_transport_method)`: + $(_kw_stepsize) +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(max(1000, memory_size))`$(_sc_any)[`StopWhenGradientNormLess`](@ref)`(1e-6)`: + $(_kw_stopping_criterion) +* $(_kw_vector_transport_method_default): $(_kw_vector_transport_method) -# Output +$(_kw_others) -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details. +$(_doc_sec_output) """ + +@doc "$(_doc_QN)" function quasi_Newton( M::AbstractManifold, f::TF, @@ -266,20 +277,8 @@ function quasi_Newton( q = copy(M, p) return quasi_Newton!(M, mgo, q; kwargs...) end -@doc raw""" - quasi_Newton!(M, F, gradF, x; options...) -Perform a quasi Newton iteration for `F` on the manifold `M` starting -in the point `x` using a retraction ``R`` and a vector transport ``T``. - -# Input -* `M` a manifold ``\mathcal{M}``. -* `F` a cost function ``F: \mathcal{M} →ℝ`` to minimize. -* `gradF` the gradient ``\operatorname{grad}F : \mathcal{M} → T_x\mathcal M`` of ``F`` implemented as `gradF(M,p)`. -* `x` an initial value ``x ∈ \mathcal{M}``. - -For all optional parameters, see [`quasi_Newton`](@ref). -""" +@doc "$(_doc_QN)" quasi_Newton!(M::AbstractManifold, params...; kwargs...) function quasi_Newton!( M::AbstractManifold, @@ -374,7 +373,7 @@ function initialize_solver!(amp::AbstractManoptProblem, qns::QuasiNewtonState) initialize_update!(qns.direction_update) return qns end -function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter) +function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, k) M = get_manifold(mp) get_gradient!(mp, qns.X, qns.p) qns.direction_update(qns.η, mp, qns) @@ -391,7 +390,7 @@ function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter) end end end - α = qns.stepsize(mp, qns, iter, qns.η) + α = qns.stepsize(mp, qns, k, qns.η) copyto!(M, qns.p_old, get_iterate(qns)) retract!(M, qns.p, qns.p, qns.η, α, qns.retraction_method) qns.η .*= α @@ -417,7 +416,7 @@ function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter) copyto!(M, qns.X_old, qns.p, qns.X) get_gradient!(mp, qns.X, qns.p) qns.yk .= qns.X ./ β .- qns.X_old - update_hessian!(qns.direction_update, mp, qns, qns.p_old, iter) + update_hessian!(qns.direction_update, mp, qns, qns.p_old, k) return qns end @@ -428,14 +427,14 @@ function locking_condition_scale( end @doc raw""" - update_hessian!(d, amp, st, p_old, iter) + update_hessian!(d::AbstractQuasiNewtonDirectionUpdate, amp, st, p_old, k) -update the Hessian within the [`QuasiNewtonState`](@ref) `o` given a [`AbstractManoptProblem`](@ref) `amp` +update the Hessian within the [`QuasiNewtonState`](@ref) `st` given a [`AbstractManoptProblem`](@ref) `amp` as well as the an [`AbstractQuasiNewtonDirectionUpdate`](@ref) `d` and the last iterate `p_old`. -Note that the current (`iter`th) iterate is already stored in `o.x`. +Note that the current (`k`th) iterate is already stored in [`get_iterate`](@ref)`(st)`. -See also [`AbstractQuasiNewtonUpdateRule`](@ref) for the different rules that are available -within `d`. +See also [`AbstractQuasiNewtonUpdateRule`](@ref) and its subtypes for the different rules +that are available within `d`. """ update_hessian!(d::AbstractQuasiNewtonDirectionUpdate, ::Any, ::Any, ::Any, ::Any) diff --git a/src/solvers/record_solver.jl b/src/solvers/record_solver.jl index 49a4ca674f..07195701d8 100644 --- a/src/solvers/record_solver.jl +++ b/src/solvers/record_solver.jl @@ -14,25 +14,25 @@ function initialize_solver!(amp::AbstractManoptProblem, rss::RecordSolverState) end """ - step_solver!(amp::AbstractManoptProblem, rss::RecordSolverState, i) + step_solver!(amp::AbstractManoptProblem, rss::RecordSolverState, k) Extend the `i`th step of the solver by a hook to run records, that were added to the `:Iteration` entry. """ -function step_solver!(amp::AbstractManoptProblem, rss::RecordSolverState, i) - step_solver!(amp, rss.state, i) - get(rss.recordDictionary, :Iteration, RecordGroup())(amp, get_state(rss), i) +function step_solver!(amp::AbstractManoptProblem, rss::RecordSolverState, k) + step_solver!(amp, rss.state, k) + get(rss.recordDictionary, :Iteration, RecordGroup())(amp, get_state(rss), k) return rss end """ - stop_solver!(amp::AbstractManoptProblem, rss::RecordSolverState, i) + stop_solver!(amp::AbstractManoptProblem, rss::RecordSolverStatek k) Extend the call to the stopping criterion by a hook to run records, that were added to the `:Stop` entry. """ -function stop_solver!(amp::AbstractManoptProblem, rss::RecordSolverState, i) - stop = stop_solver!(amp, rss.state, i) - stop && get(rss.recordDictionary, :Stop, RecordGroup())(amp, get_state(rss), i) +function stop_solver!(amp::AbstractManoptProblem, rss::RecordSolverState, k) + stop = stop_solver!(amp, rss.state, k) + stop && get(rss.recordDictionary, :Stop, RecordGroup())(amp, get_state(rss), k) return stop end diff --git a/src/solvers/solver.jl b/src/solvers/solver.jl index a6f882a558..2a72c74281 100644 --- a/src/solvers/solver.jl +++ b/src/solvers/solver.jl @@ -7,14 +7,14 @@ decorate the [`AbstractManoptSolverState`](@ref)` s` with specific decorators. optional arguments provide necessary details on the decorators. -* `debug`: (`Array{Union{Symbol,DebugAction,String,Int},1}()`) a set of symbols +* `debug=Array{Union{Symbol,DebugAction,String,Int},1}()`: a set of symbols representing [`DebugAction`](@ref)s, `Strings` used as dividers and a sub-sampling integer. These are passed as a [`DebugGroup`](@ref) within `:Iteration` to the [`DebugSolverState`](@ref) decorator dictionary. Only exception is `:Stop` that is passed to `:Stop`. -* `record`: (`Array{Union{Symbol,RecordAction,Int},1}()`) specify recordings +* `record=Array{Union{Symbol,RecordAction,Int},1}()`: specify recordings by using `Symbol`s or [`RecordAction`](@ref)s directly. An integer can again be used for only recording every ``i``th iteration. -* `return_state`: (`false`) indicate whether to wrap the options in a [`ReturnSolverState`](@ref), +* `return_state=false`: indicate whether to wrap the options in a [`ReturnSolverState`](@ref), indicating that the solver should return options and not (only) the minimizer. other keywords are ignored. @@ -64,13 +64,13 @@ decorate the [`AbstractManifoldObjective`](@ref)` o` with specific decorators. optional arguments provide necessary details on the decorators. A specific one is used to activate certain decorators. -* `cache`: (`missing`) specify a cache. Currently `:Simple` is supported and `:LRU` if you +* `cache=missing`: specify a cache. Currently `:Simple` is supported and `:LRU` if you load [`LRUCache.jl`](https://github.com/JuliaCollections/LRUCache.jl). For this case a tuple specifying what to cache and how many can be provided, has to be specified. For example `(:LRU, [:Cost, :Gradient], 10)` states that the last 10 used cost function evaluations and gradient evaluations should be stored. See [`objective_cache_factory`](@ref) for details. -* `count`: (`missing`) specify calls to the objective to be called, see [`ManifoldCountObjective`](@ref) for the full list -* `objective_type`: (`:Riemannian`) specify that an objective is `:Riemannian` or `:Euclidean`. +* `count=missing`: specify calls to the objective to be called, see [`ManifoldCountObjective`](@ref) for the full list +* `objective_type=:Riemannian`: specify that an objective is `:Riemannian` or `:Euclidean`. The `:Euclidean` symbol is equivalent to specifying it as `:Embedded`, since in the end, both refer to converting an objective from the embedding (whether its Euclidean or not) to the Riemannian one. @@ -140,27 +140,27 @@ function solve!(p::AbstractManoptProblem, s::AbstractManoptSolverState) end """ - step_solver!(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i) + step_solver!(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, k) Do one iteration step (the `i`th) for an [`AbstractManoptProblem`](@ref)` p` by modifying the values in the [`AbstractManoptSolverState`](@ref) `ams`. """ -step_solver!(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i) -function step_solver!(p::AbstractManoptProblem, s::ReturnSolverState, i) - return step_solver!(p, s.state, i) +step_solver!(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, k) +function step_solver!(p::AbstractManoptProblem, s::ReturnSolverState, k) + return step_solver!(p, s.state, k) end """ - stop_solver!(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i) + stop_solver!(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, k) depending on the current [`AbstractManoptProblem`](@ref) `amp`, the current state of the solver stored in [`AbstractManoptSolverState`](@ref) `ams` and the current iterate `i` this function determines whether to stop the solver, which by default means to call the internal [`StoppingCriterion`](@ref). `ams.stop` """ -function stop_solver!(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, i) - return ams.stop(amp, ams, i) +function stop_solver!(amp::AbstractManoptProblem, ams::AbstractManoptSolverState, k) + return ams.stop(amp, ams, k) end -function stop_solver!(p::AbstractManoptProblem, s::ReturnSolverState, i) - return stop_solver!(p, s.state, i) +function stop_solver!(p::AbstractManoptProblem, s::ReturnSolverState, k) + return stop_solver!(p, s.state, k) end diff --git a/src/solvers/stochastic_gradient_descent.jl b/src/solvers/stochastic_gradient_descent.jl index e5df4b21d6..53137d1f35 100644 --- a/src/solvers/stochastic_gradient_descent.jl +++ b/src/solvers/stochastic_gradient_descent.jl @@ -1,4 +1,3 @@ - """ StochasticGradientDescentState <: AbstractGradientDescentSolverState @@ -7,21 +6,30 @@ see also [`ManifoldStochasticGradientObjective`](@ref) and [`stochastic_gradient # Fields -* `p`: the current iterate -* `direction`: ([`StochasticGradient`](@ref)) a direction update to use -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(1000)`) a [`StoppingCriterion`](@ref) -* `stepsize`: ([`ConstantStepsize`](@ref)`(1.0)`) a [`Stepsize`](@ref) -* `evaluation_order`: (`:Random`) specify whether to use a randomly permuted sequence (`:FixedRandom`), - a per cycle permuted sequence (`:Linear`) or the default `:Random` one. -* `order`: the current permutation -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a `retraction(M, p, X)` to use. +* $(_field_iterate) +* `direction`: a direction update to use +* $(_field_stop) +* $(_field_step) +* `evaluation_order`: specify whether to use a randomly permuted sequence (`:FixedRandom`:), + a per cycle permuted sequence (`:Linear`) or the default, a `:Random` sequence. +* `order`: stores the current permutation +* $(_field_retr) # Constructor - StochasticGradientDescentState(M, p) + StochasticGradientDescentState(M, p, X; kwargs...) Create a `StochasticGradientDescentState` with start point `p`. -all other fields are optional keyword arguments, and the defaults are taken from `M`. + +# Keyword arguments + +* `direction=`[`StochasticGradient`](@ref)`($(_link_zero_vector())) +* `order_type=:RandomOrder`` +* `order=Int[]`: specify how to store the order of indices for the next epoche +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(1000)`: $(_kw_stopping_criterion) +* `stepsize=`[`default_stepsize`[@ref)`(M, StochasticGradientDescentState)`: $(_kw_stepsize) + This default is the [`ConstantStepsize`](@ref)`(M)` """ mutable struct StochasticGradientDescentState{ TX, @@ -97,6 +105,10 @@ end The default gradient processor, which just evaluates the (stochastic) gradient or a subset thereof. +# Fields + +* `dir::T`: a storage for a tangent vector. + # Constructor StochasticGradient(M::AbstractManifold; p=rand(M), X=zero_vector(M, p)) @@ -125,39 +137,44 @@ function default_stepsize(M::AbstractManifold, ::Type{StochasticGradientDescentS return ConstantStepsize(M) end -@doc raw""" - stochastic_gradient_descent(M, grad_f, p; kwargs...) - stochastic_gradient_descent(M, msgo, p; kwargs...) +_doc_SGD = """ + stochastic_gradient_descent(M, grad_f, p=rand(M); kwargs...) + stochastic_gradient_descent(M, msgo; kwargs...) + stochastic_gradient_descent!(M, grad_f, p; kwargs...) + stochastic_gradient_descent!(M, msgo, p; kwargs...) -perform a stochastic gradient descent +perform a stochastic gradient descent. This can be perfomed in-place of `p`. # Input -* `M`: a manifold ``\mathcal M`` -* `grad_f`: a gradient function, that either returns a vector of the subgradients - or is a vector of gradients -* `p`: an initial value ``x ∈ \mathcal M`` +$(_arg_M) +* `grad_f`: a gradient function, that either returns a vector of the gradients + or is a vector of gradient functions +$(_arg_p) alternatively to the gradient you can provide an [`ManifoldStochasticGradientObjective`](@ref) `msgo`, then using the `cost=` keyword does not have any effect since if so, the cost is already within the objective. -# Optional -* `cost`: (`missing`) you can provide a cost function for example to track the function value -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradients works by - allocation (default) form `gradF(M, x)` or [`InplaceEvaluation`](@ref) in place of the form `gradF!(M, X, x)` (elementwise). -* `evaluation_order`: (`:Random`) specify whether to use a randomly permuted sequence (`:FixedRandom`), +# Keyword arguments + +* `cost=missing`: you can provide a cost function for example to track the function value +* `direction=`[`StochasticGradient`](@ref)`($(_link_zero_vector())) +* $(_kw_evaluation_default): $(_kw_evaluation) +* `evaluation_order=:Random`: specify whether to use a randomly permuted sequence (`:FixedRandom`:, a per cycle permuted sequence (`:Linear`) or the default `:Random` one. -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(1000)`) a [`StoppingCriterion`](@ref) -* `stepsize`: ([`ConstantStepsize`](@ref)`(1.0)`) a [`Stepsize`](@ref) -* `order_type`: (`:RandomOder`) a type of ordering of gradient evaluations. +* `order_type=:RandomOder`: a type of ordering of gradient evaluations. Possible values are `:RandomOrder`, a `:FixedPermutation`, `:LinearOrder` -* `order`: (`[1:n]`) the initial permutation, where `n` is the number of gradients in `gradF`. -* `retraction_method`: (`default_retraction_method(M, typeof(p))`) a retraction to use. +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(1000)`: $(_kw_stopping_criterion) +* `stepsize=`[`default_stepsize`[@ref)`(M, StochasticGradientDescentState)`: $(_kw_stepsize) +* `order=[1:n]`: the initial permutation, where `n` is the number of gradients in `gradF`. +* $(_kw_retraction_method_default): $(_kw_retraction_method) -# Output +$(_kw_others) -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$(_doc_sec_output) """ + +@doc "$(_doc_SGD)" stochastic_gradient_descent(M::AbstractManifold, args...; kwargs...) function stochastic_gradient_descent(M::AbstractManifold, grad_f; kwargs...) return stochastic_gradient_descent(M, grad_f, rand(M); kwargs...) @@ -206,24 +223,7 @@ function stochastic_gradient_descent( return stochastic_gradient_descent!(M, msgo, q; kwargs...) end -@doc raw""" - stochastic_gradient_descent!(M, grad_f, p) - stochastic_gradient_descent!(M, msgo, p) - -perform a stochastic gradient descent in place of `p`. - -# Input - -* `M`: a manifold ``\mathcal M`` -* `grad_f`: a gradient function, that either returns a vector of the subgradients - or is a vector of gradients -* `p`: an initial value ``p ∈ \mathcal M`` - -Alternatively to the gradient you can provide an [`ManifoldStochasticGradientObjective`](@ref) `msgo`, -then using the `cost=` keyword does not have any effect since if so, the cost is already within the objective. - -for all optional parameters, see [`stochastic_gradient_descent`](@ref). -""" +@doc "$(_doc_SGD)" stochastic_gradient_descent!(::AbstractManifold, args...; kwargs...) function stochastic_gradient_descent!( M::AbstractManifold, diff --git a/src/solvers/subgradient.jl b/src/solvers/subgradient.jl index ac4d81cfc7..6424c5f1b3 100644 --- a/src/solvers/subgradient.jl +++ b/src/solvers/subgradient.jl @@ -5,20 +5,26 @@ stores option values for a [`subgradient_method`](@ref) solver # Fields -* `retraction_method`: the retraction to use within -* `stepsize`: ([`ConstantStepsize`](@ref)`(M)`) a [`Stepsize`](@ref) -* `stop`: ([`StopAfterIteration`](@ref)`(5000)``)a [`StoppingCriterion`](@ref) -* `p`: (initial or current) value the algorithm is at -* `p_star`: optimal value (initialized to a copy of `p`.) -* `X`: (`zero_vector(M, p)`) the current element from the possible - subgradients at `p` that was last evaluated. +* $(_field_p) +* `p_star`: optimal value +* $(_field_retr) +* $(_field_step) +* $(_field_stop) +* `X`: the current element from the possible subgradients at `p` that was last evaluated. # Constructor SubGradientMethodState(M::AbstractManifold, p; kwargs...) -with keywords for all fields besides `p_star` which obtains the same type as `p`. -You can use `X=` to specify the type of tangent vector to use +Initialise the Subgradient method state to initial point `p` + +# Keyword arguments + +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stepsize=`[`default_stepsize`](@ref)`(M, SubGradientMethodState)`, + which here defaults to [`ConstantStepsize`](@ref)`(M)` +* `stopping_criterion=[`StopAfterIteration`](@ref)`(5000)`: $(_kw_stopping_criterion) +* $(_kw_X_default): $(_kw_X) """ mutable struct SubGradientMethodState{ TR<:AbstractRetractionMethod,TS<:Stepsize,TSC<:StoppingCriterion,P,T @@ -78,14 +84,15 @@ function default_stepsize(M::AbstractManifold, ::Type{SubGradientMethodState}) return ConstantStepsize(M) end -@doc raw""" +_doc_SGM = """ subgradient_method(M, f, ∂f, p=rand(M); kwargs...) subgradient_method(M, sgo, p=rand(M); kwargs...) + subgradient_method!(M, f, ∂f, p; kwargs...) + subgradient_method!(M, sgo, p; kwargs...) -perform a subgradient method ``p_{k+1} = \mathrm{retr}(p_k, s_k∂f(p_k))``, +perform a subgradient method ``p^{(k+1)} = $(_l_retr)\\bigl(p^{(k)}, s^{(k)}∂f(p^{(k)})\\bigr)``, +where ``$(_l_retr)`` is a retraction, ``s^{(k)}`` is a step size. -where ``\mathrm{retr}`` is a retraction, ``s_k`` is a step size, usually the -[`ConstantStepsize`](@ref) but also be specified. Though the subgradient might be set valued, the argument `∂f` should always return _one_ element from the subgradient, but not necessarily deterministic. @@ -93,25 +100,21 @@ For more details see [FerreiraOliveira:1998](@cite). # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f:\mathcal M→ℝ`` to minimize -* `∂f`: the (sub)gradient ``∂ f: \mathcal M→ T\mathcal M`` of f - restricted to always only returning one value/element from the subdifferential. - This function can be passed as an allocation function `(M, p) -> X` or - a mutating function `(M, X, p) -> X`, see `evaluation`. -* `p`: (`rand(M)`) an initial value ``p_0=p ∈ \mathcal M`` +$(_arg_M) +$(_arg_f) +* `∂f`: the (sub)gradient ``∂ f: $(_l_M) → T$(_l_M)`` of f +$(_arg_p) alternatively to `f` and `∂f` a [`ManifoldSubgradientObjective`](@ref) `sgo` can be provided. # Optional -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the subgradient - works by allocation (default) form `∂f(M, y)` or [`InplaceEvaluation`](@ref) in place - of the form `∂f!(M, X, x)`. -* `retraction`: (`default_retraction_method(M, typeof(p))`) a retraction to use. -* `stepsize`: ([`ConstantStepsize`](@ref)`(M)`) specify a [`Stepsize`](@ref) -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(5000)`) - a functor, see[`StoppingCriterion`](@ref), indicating when to stop. +* $(_kw_evaluation_default): $(_kw_evaluation) +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stepsize=`[`default_stepsize`](@ref)`(M, SubGradientMethodState)`: $(_kw_stepsize) + which here defaults to [`ConstantStepsize`](@ref)`(M)` +* `stopping_criterion=[`StopAfterIteration`](@ref)`(5000)`: $(_kw_stopping_criterion) +* $(_kw_X_default): $(_kw_X) and the ones that are passed to [`decorate_state!`](@ref) for decorators. @@ -119,6 +122,8 @@ and the ones that are passed to [`decorate_state!`](@ref) for decorators. the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details """ + +@doc "$(_doc_SGM)" subgradient_method(::AbstractManifold, args...; kwargs...) function subgradient_method(M::AbstractManifold, f, ∂f; kwargs...) return subgradient_method(M, f, ∂f, rand(M); kwargs...) @@ -156,26 +161,7 @@ function subgradient_method( return subgradient_method!(M, sgo, q; kwargs...) end -@doc raw""" - subgradient_method!(M, f, ∂f, p) - subgradient_method!(M, sgo, p) - -perform a subgradient method ``p_{k+1} = \mathrm{retr}(p_k, s_k∂f(p_k))``, - -# Input - -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f:\mathcal M→ℝ`` to minimize -* `∂f`: the (sub)gradient ``∂f: \mathcal M→ T\mathcal M`` of F - restricted to always only returning one value/element from the subdifferential. - This function can be passed as an allocation function `(M, p) -> X` or - a mutating function `(M, X, p) -> X`, see `evaluation`. -* `p`: an initial value ``p_0=p ∈ \mathcal M`` - -alternatively to `f` and `∂f` a [`ManifoldSubgradientObjective`](@ref) `sgo` can be provided. - -for more details and all optional parameters, see [`subgradient_method`](@ref). -""" +@doc "$(_doc_SGM)" subgradient_method!(M::AbstractManifold, args...; kwargs...) function subgradient_method!( M::AbstractManifold, @@ -195,6 +181,7 @@ function subgradient_method!( retraction_method::AbstractRetractionMethod=default_retraction_method(M, typeof(p)), stepsize::Stepsize=default_stepsize(M, SubGradientMethodState), stopping_criterion::StoppingCriterion=StopAfterIteration(5000), + X=zero_vector(M, p), kwargs..., ) where {O<:Union{ManifoldSubgradientObjective,AbstractDecoratedManifoldObjective}} dsgo = decorate_objective!(M, sgo; kwargs...) @@ -205,6 +192,7 @@ function subgradient_method!( stopping_criterion=stopping_criterion, stepsize=stepsize, retraction_method=retraction_method, + X=X, ) dsgs = decorate_state!(sgs; kwargs...) solve!(mp, dsgs) @@ -216,9 +204,9 @@ function initialize_solver!(mp::AbstractManoptProblem, sgs::SubGradientMethodSta sgs.X = zero_vector(M, sgs.p) return sgs end -function step_solver!(mp::AbstractManoptProblem, sgs::SubGradientMethodState, i) +function step_solver!(mp::AbstractManoptProblem, sgs::SubGradientMethodState, k) get_subgradient!(mp, sgs.X, sgs.p) - step = get_stepsize(mp, sgs, i) + step = get_stepsize(mp, sgs, k) M = get_manifold(mp) retract!(M, sgs.p, sgs.p, -step * sgs.X, sgs.retraction_method) (get_cost(mp, sgs.p) < get_cost(mp, sgs.p_star)) && copyto!(M, sgs.p_star, sgs.p) @@ -238,9 +226,9 @@ function (cs::ConstantStepsize)( return s end function (s::DecreasingStepsize)( - amp::AbstractManoptProblem, sgs::SubGradientMethodState, i::Int, args...; kwargs... + amp::AbstractManoptProblem, sgs::SubGradientMethodState, k::Int, args...; kwargs... ) - ds = (s.length - i * s.subtrahend) * (s.factor^i) / ((i + s.shift)^(s.exponent)) + ds = (s.length - k * s.subtrahend) * (s.factor^k) / ((k + s.shift)^(s.exponent)) if s.type == :absolute ns = norm(get_manifold(amp), get_iterate(sgs), get_subgradient(sgs)) if ns > eps(eltype(ds)) diff --git a/src/solvers/truncated_conjugate_gradient_descent.jl b/src/solvers/truncated_conjugate_gradient_descent.jl index 2af4f1e352..324277e210 100644 --- a/src/solvers/truncated_conjugate_gradient_descent.jl +++ b/src/solvers/truncated_conjugate_gradient_descent.jl @@ -1,34 +1,51 @@ -@doc raw""" +@doc """ TruncatedConjugateGradientState <: AbstractHessianSolverState describe the Steihaug-Toint truncated conjugate-gradient method, with # Fields -a default value is given in brackets if a parameter can be left out in initialization. - -* `Y`: (`zero_vector(M,p)`) Current iterate, whose type is also used for the other, internal, tangent vector fields -* `stop`: a [`StoppingCriterion`](@ref). -* `X`: the gradient ``\operatorname{grad}f(p)``` -* `δ`: the conjugate gradient search direction -* `θ`: (`1.0`) 1+θ is the superlinear convergence target rate. -* `κ`: (`0.1`) the linear convergence target rate. -* `trust_region_radius`: (`injectivity_radius(M)/4`) the trust-region radius -* `residual`: the gradient of the model ``m(Y)`` -* `randomize`: (`false`) -* `project!`: (`copyto!`) for numerical stability it is possible to project onto - the tangent space after every iteration. By default this only copies instead. - -# Internal fields -* `Hδ`, `HY`: temporary results of the Hessian applied to `δ` and `Y`, respectively. +Let `T` denote the type of a tangent vector and `R <: Real`. + +* `δ::T`: the conjugate gradient search direction * `δHδ`, `YPδ`, `δPδ`, `YPδ`: temporary inner products with `Hδ` and preconditioned inner products. -* `z`: the preconditioned residual -* `z_r`: inner product of the residual and `z` +* `Hδ`, `HY`: temporary results of the Hessian applied to `δ` and `Y`, respectively. +* `κ::R`: the linear convergence target rate. +* `project!`: for numerical stability it is possible to project onto the tangent space after every iteration. + the function has to work inplace of `Y`, that is `(M, Y, p, X) -> Y`, where `X` and `Y` can be the same memory. +* `randomize`: indicate whether `X` is initialised to a random vector or not +* `residual::T`: the gradient of the model ``m(Y)`` +* $(_field_stop) +* `θ::R`: the superlinear convergence target rate of ``1+θ`` +* `trust_region_radius::R`: the trust-region radius +* `X::T`: the gradient ``$(_l_grad)f(p)`` +* `Y::T`: current iterate tangent vector +* `z::T`: the preconditioned residual +* `z_r::R`: inner product of the residual and `z` # Constructor TruncatedConjugateGradientState(TpM::TangentSpace, Y=rand(TpM); kwargs...) +Initialise the TCG state. + +## Input + +* `TpM`: a [`TangentSpace`](@extref `ManifoldsBase.TangentSpace`) +* `Y`: an initial tangent vector + +## Keyword arguments + +* `κ=0.1` +* `project!::F=copyto!`: initialise the numerical stabilisation to just copy the result +* `randomize=false` +* `θ=1.0` +* `trust_region_radius=`[`injectivity_radius`](@extref `ManifoldsBase.injectivity_radius-Tuple{AbstractManifold}`)`(base_manifold(TpM)) / 4` +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(`$(_link_manifold_dimension("base_manifold(Tpm)"))`)` + $(_sc_any)[`StopWhenResidualIsReducedByFactorOrPower`](@ref)`(; κ=κ, θ=θ)`$(_sc_any)[`StopWhenTrustRegionIsExceeded`](@ref)`()` + $(_sc_any)[`StopWhenCurvatureIsNegative`](@ref)`()`$(_sc_any)[`StopWhenModelIncreased`](@ref)`()`: + $(_kw_stopping_criterion) + # See also [`truncated_conjugate_gradient_descent`](@ref), [`trust_regions`](@ref) @@ -118,20 +135,20 @@ end # # Special stopping Criteria # -@doc raw""" +@doc """ StopWhenResidualIsReducedByFactorOrPower <: StoppingCriterion A functor for testing if the norm of residual at the current iterate is reduced either by a power of 1+θ or by a factor κ compared to the norm of the initial residual. The criterion hence reads -``\Vert r_k \Vert_p \leqq \Vert r_0 \Vert_p \min \bigl( \kappa, \Vert r_0 \Vert_p^θ \bigr)``. + +``$(_l_norm("r_k","p")) ≦ $(_l_norm("r_0","p^{(0)}")) \\min \\bigl( κ, $(_l_norm("r_0","p^{(0)}")) \\bigr)``. # Fields * `κ`: the reduction factor * `θ`: part of the reduction power -* `reason`: stores a reason of stopping if the stopping criterion has one be reached, - see [`get_reason`](@ref). +* $(_field_at_iteration) # Constructor @@ -154,17 +171,17 @@ mutable struct StopWhenResidualIsReducedByFactorOrPower{F} <: StoppingCriterion end end function (c::StopWhenResidualIsReducedByFactorOrPower)( - mp::AbstractManoptProblem, tcgstate::TruncatedConjugateGradientState, i::Int + mp::AbstractManoptProblem, tcgstate::TruncatedConjugateGradientState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end TpM = get_manifold(mp) M = base_manifold(TpM) p = TpM.point if norm(M, p, tcgstate.residual) <= - tcgstate.initialResidualNorm * min(c.κ, tcgstate.initialResidualNorm^(c.θ)) && i > 0 - c.at_iteration = i + tcgstate.initialResidualNorm * min(c.κ, tcgstate.initialResidualNorm^(c.θ)) && k > 0 + c.at_iteration = k return true end return false @@ -189,6 +206,7 @@ end @doc raw""" update_stopping_criterion!(c::StopWhenResidualIsReducedByFactorOrPower, :ResidualPower, v) + Update the residual Power `θ` to `v`. """ function update_stopping_criterion!( @@ -200,6 +218,7 @@ end @doc raw""" update_stopping_criterion!(c::StopWhenResidualIsReducedByFactorOrPower, :ResidualFactor, v) + Update the residual Factor `κ` to `v`. """ function update_stopping_criterion!( @@ -209,16 +228,18 @@ function update_stopping_criterion!( return c end -@doc raw""" +@doc """ StopWhenTrustRegionIsExceeded <: StoppingCriterion A functor for testing if the norm of the next iterate in the Steihaug-Toint truncated conjugate gradient -method is larger than the trust-region radius ``θ \leq \Vert Y_{k}^{*} \Vert_p`` +method is larger than the trust-region radius ``θ ≤ $(_l_norm("Y^{(k)}^{*}","p^{(k)}"))`` and to end the algorithm when the trust region has been left. # Fields -* `reason`: stores a reason of stopping if the stopping criterion has been reached, see [`get_reason`](@ref). +* $(_field_at_iteration) +* `trr` the trust region radius +* `YPY` the computed norm of ``Y``. # Constructor @@ -239,15 +260,15 @@ mutable struct StopWhenTrustRegionIsExceeded{F} <: StoppingCriterion end StopWhenTrustRegionIsExceeded() = StopWhenTrustRegionIsExceeded(0.0) function (c::StopWhenTrustRegionIsExceeded)( - ::AbstractManoptProblem, tcgs::TruncatedConjugateGradientState, i::Int + ::AbstractManoptProblem, tcgs::TruncatedConjugateGradientState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end - if tcgs.YPY >= tcgs.trust_region_radius^2 && i >= 0 + if tcgs.YPY >= tcgs.trust_region_radius^2 && k >= 0 c.YPY = tcgs.YPY c.trr = tcgs.trust_region_radius - c.at_iteration = i + c.at_iteration = k return true end return false @@ -267,15 +288,18 @@ function show(io::IO, c::StopWhenTrustRegionIsExceeded) return print(io, "StopWhenTrustRegionIsExceeded()\n $(status_summary(c))") end -@doc raw""" +@doc """ StopWhenCurvatureIsNegative <: StoppingCriterion A functor for testing if the curvature of the model is negative, -``⟨δ_k, \operatorname{Hess}[F](\delta_k)⟩_p ≦ 0``. +``⟨δ_k, $(_l_Hess) F(p)[δ_k]⟩_p ≦ 0``. In this case, the model is not strictly convex, and the stepsize as computed does not yield a reduction of the model. # Fields + +* $(_field_at_iteration) +* `value` store the value of the inner product. * `reason`: stores a reason of stopping if the stopping criterion has been reached, see [`get_reason`](@ref). @@ -294,14 +318,14 @@ end StopWhenCurvatureIsNegative() = StopWhenCurvatureIsNegative(0.0) StopWhenCurvatureIsNegative(v::R) where {R<:Real} = StopWhenCurvatureIsNegative{R}(v, -1) function (c::StopWhenCurvatureIsNegative)( - ::AbstractManoptProblem, tcgs::TruncatedConjugateGradientState, i::Int + ::AbstractManoptProblem, tcgs::TruncatedConjugateGradientState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 end - if tcgs.δHδ <= 0 && i > 0 + if tcgs.δHδ <= 0 && k > 0 c.value = tcgs.δHδ - c.at_iteration = i + c.at_iteration = k return true end return false @@ -321,14 +345,16 @@ function show(io::IO, c::StopWhenCurvatureIsNegative) return print(io, "StopWhenCurvatureIsNegative()\n $(status_summary(c))") end -@doc raw""" +@doc """ StopWhenModelIncreased <: StoppingCriterion A functor for testing if the curvature of the model value increased. # Fields -* `reason`: stores a reason of stopping if the stopping criterion has been reached, - see [`get_reason`](@ref). + +* $(_field_at_iteration) +* `model_value`stre the last model value +* `inc_model_value` store the model value that increased # Constructor @@ -345,16 +371,16 @@ mutable struct StopWhenModelIncreased{F} <: StoppingCriterion end StopWhenModelIncreased() = StopWhenModelIncreased(-1, Inf, Inf) function (c::StopWhenModelIncreased)( - ::AbstractManoptProblem, tcgs::TruncatedConjugateGradientState, i::Int + ::AbstractManoptProblem, tcgs::TruncatedConjugateGradientState, k::Int ) - if i == 0 # reset on init + if k == 0 # reset on init c.at_iteration = -1 c.model_value = Inf c.inc_model_value = Inf end - if i > 0 && (tcgs.model_value > c.model_value) + if k > 0 && (tcgs.model_value > c.model_value) c.inc_model_value = tcgs.model_value - c.at_iteration = i + c.at_iteration = k return true end c.model_value = tcgs.model_value @@ -375,7 +401,16 @@ function show(io::IO, c::StopWhenModelIncreased) return print(io, "StopWhenModelIncreased()\n $(status_summary(c))") end -@doc raw""" +_doc_TCG_subproblem = raw""" +```math +\begin{align*} +\operatorname*{arg\,min}_{Y ∈ T_p\mathcal{M}}&\ m_p(Y) = f(p) + +⟨\operatorname{grad}f(p), Y⟩_p + \frac{1}{2} ⟨\mathcal{H}_p[Y], Y⟩_p\\ +\text{such that}& \ \lVert Y \rVert_p ≤ Δ +\end{align*} +``` +""" +_doc_TCGD = """ truncated_conjugate_gradient_descent(M, f, grad_f, p; kwargs...) truncated_conjugate_gradient_descent(M, f, grad_f, p, X; kwargs...) truncated_conjugate_gradient_descent(M, f, grad_f, Hess_f; kwargs...) @@ -383,58 +418,63 @@ end truncated_conjugate_gradient_descent(M, f, grad_f, Hess_f, p, X; kwargs...) truncated_conjugate_gradient_descent(M, mho::ManifoldHessianObjective, p, X; kwargs...) truncated_conjugate_gradient_descent(M, trmo::TrustRegionModelObjective, p, X; kwargs...) + truncated_conjugate_gradient_descent!(M, f, grad_f, Hess_f, p, X; kwargs...) + truncated_conjugate_gradient_descent!(M, f, grad_f, p, X; kwargs...) + truncated_conjugate_gradient_descent!(M, mho::ManifoldHessianObjective, p, X; kwargs...) + truncated_conjugate_gradient_descent!(M, trmo::TrustRegionModelObjective, p, X; kwargs...) solve the trust-region subproblem -```math -\begin{align*} -\operatorname*{arg\,min}_{Y ∈ T_p\mathcal{M}}&\ m_p(Y) = f(p) + -⟨\operatorname{grad}f(p), Y⟩_p + \frac{1}{2} ⟨\mathcal{H}_p[Y], Y⟩_p\\ -\text{such that}& \ \lVert Y \rVert_p ≤ Δ -\end{align*} -``` +$(_doc_TCG_subproblem) + +on a manifold ``$(_l_M)`` by using the Steihaug-Toint truncated conjugate-gradient (tCG) method. +This can be done inplace of `X`. -on a manifold M by using the Steihaug-Toint truncated conjugate-gradient (tCG) method. For a description of the algorithm and theorems offering convergence guarantees, see [AbsilBakerGallivan:2006, ConnGouldToint:2000](@cite). # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f: \mathcal M → ℝ`` to minimize -* `grad_f`: the gradient ``\operatorname{grad}f: \mathcal M → T\mathcal M`` of `F` -* `Hess_f`: (optional, cf. [`ApproxHessianFiniteDifference`](@ref)) the Hessian ``\operatorname{Hess}f: T_p\mathcal M → T_p\mathcal M``, ``X ↦ \operatorname{Hess}F(p)[X] = ∇_X\operatorname{grad}f(p)`` -* `p`: a point on the manifold ``p ∈ \mathcal M`` -* `X`: an initial tangential vector ``X ∈ T_p\mathcal M`` +$(_arg_M) +$(_arg_f) +$(_arg_grad_f) +$(_arg_Hess_f) +$(_arg_p) +$(_arg_X) Instead of the three functions, you either provide a [`ManifoldHessianObjective`](@ref) `mho` which is then used to build the trust region model, or a [`TrustRegionModelObjective`](@ref) `trmo` directly. -# Optional - -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradient and Hessian work by - allocation (default) or [`InplaceEvaluation`](@ref) in place -* `preconditioner`: a preconditioner for the Hessian H -* `θ`: (`1.0`) 1+θ is the superlinear convergence target rate. -* `κ`: (`0.1`) the linear convergence target rate. -* `randomize`: set to true if the trust-region solve is initialized to a random tangent vector. +# Keyword arguments + +* $(_kw_evaluation_default): $(_kw_evaluation) +* `preconditioner`: a preconditioner for the Hessian H. + This is either an allocating function `(M, p, X) -> Y` or an in-place function `(M, Y, p, X) -> Y`, + see `evaluation`, and by default set to the identity. +* `θ=1.0`: the superlinear convergence target rate of ``1+θ`` +* `κ=0.1`: the linear convergence target rate. +* `project!=copyto!`: for numerical stability it is possible to project onto the tangent space after every iteration. + the function has to work inplace of `Y`, that is `(M, Y, p, X) -> Y`, where `X` and `Y` can be the same memory. +* `randomize=false`: indicate whether `X` is initialised to a random vector or not. This disables preconditioning. -* `trust_region_radius`: (`injectivity_radius(M)/4`) a trust-region radius -* `project!`: (`copyto!`) for numerical stability it is possible to project onto - the tangent space after every iteration. By default this only copies instead. -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(manifol_dimension(M)) | `[`StopWhenResidualIsReducedByFactorOrPower`](@ref)`(;κ=κ, θ=θ) | `[`StopWhenCurvatureIsNegative`](@ref)`() | `[`StopWhenTrustRegionIsExceeded`](@ref)`() | `[`StopWhenModelIncreased`](@ref)`()`) - a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop, - -and the ones that are passed to [`decorate_state!`](@ref) for decorators. +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(`$(_link_manifold_dimension("base_manifold(Tpm)"))`)` + $(_sc_any)[`StopWhenResidualIsReducedByFactorOrPower`](@ref)`(; κ=κ, θ=θ)`$(_sc_any)[`StopWhenTrustRegionIsExceeded`](@ref)`()` + $(_sc_any)[`StopWhenCurvatureIsNegative`](@ref)`()`$(_sc_any)[`StopWhenModelIncreased`](@ref)`()`: + $(_kw_stopping_criterion) +* `trust_region_radius=`[`injectivity_radius`](@extref `ManifoldsBase.injectivity_radius-Tuple{AbstractManifold}`)`(M) / 4`: the initial trust-region radius -# Output +$(_kw_others) -the obtained (approximate) minimizer ``Y^*``, see [`get_solver_return`](@ref) for details +$(_doc_sec_output) # See also + [`trust_regions`](@ref) """ + +@doc "$(_doc_TCGD)" truncated_conjugate_gradient_descent(M::AbstractManifold, args; kwargs...) # No Hessian, no point/vector function truncated_conjugate_gradient_descent(M::AbstractManifold, f, grad_f; kwargs...) @@ -569,22 +609,7 @@ function truncated_conjugate_gradient_descent( return truncated_conjugate_gradient_descent!(M, mho, q, Y; kwargs...) end -@doc raw""" - truncated_conjugate_gradient_descent!(M, f, grad_f, Hess_f, p, X; kwargs...) - truncated_conjugate_gradient_descent!(M, f, grad_f, p, X; kwargs...) - -solve the trust-region subproblem in place of `X` (and `p`). - -# Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``F: \mathcal M → ℝ`` to minimize -* `grad_f`: the gradient ``\operatorname{grad}f: \mathcal M → T\mathcal M`` of `f` -* `Hess_f`: the Hessian ``\operatorname{Hess}f(x): T_p\mathcal M → T_p\mathcal M``, ``X ↦ \operatorname{Hess}f(p)[X]`` -* `p`: a point on the manifold ``p ∈ \mathcal M`` -* `X`: an update tangential vector ``X ∈ T_x\mathcal M`` - -For more details and all optional arguments, see [`truncated_conjugate_gradient_descent`](@ref). -""" +@doc "$(_doc_TCGD)" truncated_conjugate_gradient_descent!(M::AbstractManifold, args...; kwargs...) # no Hessian function truncated_conjugate_gradient_descent!( @@ -676,9 +701,6 @@ function truncated_conjugate_gradient_descent!( return get_solver_return(get_objective(mp), dtcgs) end -# -# Maybe these could be improved a bit in readability some time -# function initialize_solver!( mp::AbstractManoptProblem, tcgs::TruncatedConjugateGradientState ) diff --git a/src/solvers/trust_regions.jl b/src/solvers/trust_regions.jl index 0a8f671314..3fb55db809 100644 --- a/src/solvers/trust_regions.jl +++ b/src/solvers/trust_regions.jl @@ -1,68 +1,67 @@ -@doc raw""" +@doc """ TrustRegionsState <: AbstractHessianSolverState Store the state of the trust-regions solver. # Fields -All the following fields (besides `p`) can be set by specifying them as keywords. - -* `acceptance_rate`: (`0.1`) a lower bound of the performance ratio for the iterate +* `acceptance_rate`: a lower bound of the performance ratio for the iterate that decides if the iteration is accepted or not. -* `max_trust_region_radius`: (`sqrt(manifold_dimension(M))`) the maximum trust-region radius -* `p`: (`rand(M)` if a manifold is provided) the current iterate -* `project!`: (`copyto!`) specify a projection operation for tangent vectors - for numerical stability. A function `(M, Y, p, X) -> ...` working in place of `Y`. - per default, no projection is performed, set it to `project!` to activate projection. -* `stop`: ([`StopAfterIteration`](@ref)`(1000) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`) -* `randomize`: (`false`) indicates if the trust-region solve is to be initiated with a - random tangent vector. If set to true, no preconditioner is used. This option is set to true - in some scenarios to escape saddle points, but is otherwise seldom activated. -* `ρ_regularization`: (`10000.0`) regularize the model fitness ``ρ`` to avoid division by zero -* `sub_problem`: an [`AbstractManoptProblem`](@ref) problem or a function `(M, p, X) -> q` or `(M, q, p, X)` for the a closed form solution of the sub problem -* `sub_state`: ([`TruncatedConjugateGradientState`](@ref)`(M, p, X)`) -* `σ`: (`0.0` or `1e-6` depending on `randomize`) Gaussian standard deviation when creating the random initial tangent vector -* `trust_region_radius`: (`max_trust_region_radius / 8`) the (initial) trust-region radius -* `X`: (`zero_vector(M,p)`) the current gradient `grad_f(p)` - Use this default to specify the type of tangent vector to allocate also for the internal (tangent vector) fields. - -# Internal fields - -* `HX`, `HY`, `HZ`: interim storage (to avoid allocation) of ``\operatorname{Hess} f(p)[\cdot]` of `X`, `Y`, `Z` +* `HX`, `HY`, `HZ`: interim storage (to avoid allocation) of ``$(_l_Hess) f(p)[⋅]` of `X`, `Y`, `Z` +* `max_trust_region_radius`: the maximum trust-region radius +* $(_field_p) +* `project!`: for numerical stability it is possible to project onto the tangent space after every iteration. + the function has to work inplace of `Y`, that is `(M, Y, p, X) -> Y`, where `X` and `Y` can be the same memory. +* $(_field_stop) +* `randomize`: indicate whether `X` is initialised to a random vector or not +* `ρ_regularization`: regularize the model fitness ``ρ`` to avoid division by zero +* $_field_sub_problem +* $_field_sub_state +* `σ`: Gaussian standard deviation when creating the random initial tangent vector + This field has no effect, when `randomize` is false. +* `trust_region_radius`: the trust-region radius +* $(_field_X) * `Y`: the solution (tangent vector) of the subsolver * `Z`: the Cauchy point (only used if random is activated) # Constructors -All the following constructors have the fields as keyword arguments with the defaults -given in brackets. If no initial point `p` is provided, `p=rand(M)` is used - TrustRegionsState(M, mho; kwargs...) TrustRegionsState(M, p, mho; kwargs...) - -A trust region state, where the sub problem is set to a [`DefaultManoptProblem`](@ref) on the -tangent space using the [`TrustRegionModelObjective`](@ref) to be solved with [`truncated_conjugate_gradient_descent!`](@ref) -or in other words the sub state is set to [`TruncatedConjugateGradientState`](@ref). - TrustRegionsState(M, sub_problem, sub_state; kwargs...) TrustRegionsState(M, p, sub_problem, sub_state; kwargs...) - -A trust region state, where the sub problem is solved using a [`AbstractManoptProblem`](@ref) `sub_problem` -and an [`AbstractManoptSolverState`](@ref) `sub_state`. - TrustRegionsState(M, f::Function; evaluation=AllocatingEvaluation, kwargs...) TrustRegionsState(M, p, f; evaluation=AllocatingEvaluation, kwargs...) -A trust region state, where the sub problem is solved in closed form by a function -`f(M, p, Y, Δ)`, where `p` is the current iterate, `Y` the initial tangent vector at `p` and -`Δ` the current trust region radius. +# Input + +$(_arg_M) +$(_arg_p) + +as well as either +* an [`ManifoldHessianObjective`](@ref) `mho`, then `sub_state` and `sub_problem` are filled with a default (deprecated). +* a `sub_problem` and a `sub_state` +* a function `f` and its `evaluation` as a closed form solution for the sub solver. + +## Keyword arguments + +* `acceptance_rate=0.1` +* `max_trust_region_radius=sqrt(manifold_dimension(M))` +* `project!=copyto!` +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(1000)`$(_sc_any)[`StopWhenGradientNormLess`](@ref)`(1e-6)`: + $(_kw_stopping_criterion) +* `randomize=false` +* `ρ_regularization=10000.0` +* `θ=1.0` +* `trust_region_radius=max_trust_region_radius / 8` +* $(_kw_X_default): $(_kw_X) # See also -[`trust_regions`](@ref), [`trust_regions!`](@ref) +[`trust_regions`](@ref) """ mutable struct TrustRegionsState{ P, @@ -274,9 +273,11 @@ function show(io::IO, trs::TrustRegionsState) return print(io, s) end -@doc raw""" - trust_regions(M, f, grad_f, hess_f, p=rand(M)) - trust_regions(M, f, grad_f, p=rand(M)) +_doc_TR = """ + trust_regions(M, f, grad_f, Hess_f, p=rand(M); kwargs...) + trust_regions(M, f, grad_f, p=rand(M); kwargs...) + trust_regions!(M, f, grad_f, Hess_f, p; kwargs...) + trust_regions!(M, f, grad_f, p; kwargs...) run the Riemannian trust-regions solver for optimization on manifolds to minimize `f`, see on [AbsilBakerGallivan:2006, ConnGouldToint:2000](@cite). @@ -287,57 +288,62 @@ For solving the inner trust-region subproblem of finding an update-vector, by default the [`truncated_conjugate_gradient_descent`](@ref) is used. # Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f : \mathcal M → ℝ`` to minimize -* `grad_f`: the gradient ``\operatorname{grad}F : \mathcal M → T \mathcal M`` of ``F`` -* `Hess_f`: (optional), the Hessian ``\operatorname{Hess}F(x): T_x\mathcal M → T_x\mathcal M``, ``X ↦ \operatorname{Hess}F(x)[X] = ∇_ξ\operatorname{grad}f(x)`` -* `p`: (`rand(M)`) an initial value ``x ∈ \mathcal M`` + +$(_arg_M) +$(_arg_f) +$(_arg_grad_f) +$(_arg_Hess_f) +$(_arg_p) # Keyword arguments -* `acceptance_rate`: Accept/reject threshold: if ρ (the performance ratio for the iterate) +* `acceptance_rate`: accept/reject threshold: if ρ (the performance ratio for the iterate) is at least the acceptance rate ρ', the candidate is accepted. This value should be between ``0`` and ``\frac{1}{4}`` -* `augmentation_threshold`: (`0.75`) trust-region augmentation threshold: if ρ is larger than this threshold, +* `augmentation_threshold=0.75`: trust-region augmentation threshold: if ρ is larger than this threshold, a solution is on the trust region boundary and negative curvature, and the radius is extended (augmented) -* `augmentation_factor`: (`2.0`) trust-region augmentation factor -* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradient - and Hessian work by allocation (default) or [`InplaceEvaluation`](@ref) in place -* `κ`: (`0.1`) the linear convergence target rate of the tCG method +* `augmentation_factor=2.0`: trust-region augmentation factor +* $(_kw_evaluation_default): $(_kw_evaluation) +* `κ=0.1`: the linear convergence target rate of the tCG method [`truncated_conjugate_gradient_descent`](@ref), and is used in a stopping criterion therein * `max_trust_region_radius`: the maximum trust-region radius -* `preconditioner`: a preconditioner (a symmetric, positive definite operator - that should approximate the inverse of the Hessian) -* `project!`; (`copyto!`) specify a projection operation for tangent vectors - within the subsolver for numerical stability. The required form is `(M, Y, p, X) -> ...` working in place of `Y`. -* `randomize`; set to true if the trust-region solve is to be initiated with a - random tangent vector and no preconditioner is used. -* `ρ_regularization`: (`1e3`) regularize the performance evaluation ``ρ`` to avoid numerical inaccuracies. -* `reduction_factor`: (`0.25`) trust-region reduction factor -* `reduction_threshold`: (`0.1`) trust-region reduction threshold: if ρ is below this threshold, +* `preconditioner`: a preconditioner for the Hessian H. + This is either an allocating function `(M, p, X) -> Y` or an in-place function `(M, Y, p, X) -> Y`, + see `evaluation`, and by default set to the identity. +* `project!=copyto!`: for numerical stability it is possible to project onto the tangent space after every iteration. + the function has to work inplace of `Y`, that is `(M, Y, p, X) -> Y`, where `X` and `Y` can be the same memory. +* `randomize=false`: indicate whether `X` is initialised to a random vector or not. + This disables preconditioning. +* `ρ_regularization=1e3`: regularize the performance evaluation ``ρ`` to avoid numerical inaccuracies. +* `reduction_factor=0.25`: trust-region reduction factor +* `reduction_threshold=0.1`: trust-region reduction threshold: if ρ is below this threshold, the trust region radius is reduced by `reduction_factor`. -* `retraction` (`default_retraction_method(M, typeof(p))`) a retraction to use -* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(1000) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`) a functor inheriting - from [`StoppingCriterion`](@ref) indicating when to stop. -* `sub_kwargs`: keyword arguments passed to the sub state and used to decorate the sub options -* `sub_stopping_criterion`: a stopping criterion for the sub solver, uses the same standard as TCG. -* `sub_problem`: ([`DefaultManoptProblem`](@ref)`(M, `[`ConstrainedManifoldObjective`](@ref)`(subcost, subgrad; evaluation=evaluation))`) problem for the subsolver -* `sub_state`: ([`QuasiNewtonState`](@ref)) using [`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref) - with [`InverseBFGS`](@ref) and `sub_stopping_criterion` as a stopping criterion. See also `sub_kwargs`. -* `θ`: (`1.0`) 1+θ is the superlinear convergence target rate of the tCG-method +* $(_kw_retraction_method_default): $(_kw_retraction_method) +* `stopping_criterion=`[`StopAfterIteration`](@ref)`(1000)`$(_sc_any)[`StopWhenGradientNormLess`](@ref)`(1e-6)`: + $(_kw_stopping_criterion) +* $(_kw_sub_kwargs_default): $(_kw_sub_kwargs) +* `sub_stopping_criterion` – the default from [`truncated_conjugate_gradient_descent`](@ref): + $(_kw_stopping_criterion) +* `sub_problem=`[`DefaultManoptProblem`](@ref)`(M, `[`ConstrainedManifoldObjective`](@ref)`(subcost, subgrad; evaluation=evaluation))`: + problem for the subsolver +* `sub_state=`[`QuasiNewtonState`](@ref)) using [`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref) with [`InverseBFGS`](@ref) and `sub_stopping_criterion` as a stopping criterion. + See also `sub_kwargs=`. +* `θ=1.0`: the superlinear convergence target rate of ``1+θ`` of the tCG-method [`truncated_conjugate_gradient_descent`](@ref), and is used in a stopping criterion therein -* `trust_region_radius`: the initial trust-region radius +* `trust_region_radius=`[`injectivity_radius`](@extref `ManifoldsBase.injectivity_radius-Tuple{AbstractManifold}`)`(M) / 4`: the initial trust-region radius For the case that no Hessian is provided, the Hessian is computed using finite difference, see [`ApproxHessianFiniteDifference`](@ref). -# Output +$(_kw_others) -the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +$(_doc_sec_output) # See also [`truncated_conjugate_gradient_descent`](@ref) """ + +@doc "$(_doc_TR)" trust_regions(M::AbstractManifold, args...; kwargs...) # Hessian (Function) but no point function trust_regions( @@ -426,26 +432,10 @@ function trust_regions( q = copy(M, p) return trust_regions!(M, mho, q; kwargs...) end -# If the Hessian go automatically filled already _and_ p is a number -@doc raw""" - trust_regions!(M, f, grad_f, Hess_f, p; kwargs...) - trust_regions!(M, f, grad_f, p; kwargs...) - -evaluate the Riemannian trust-regions solver in place of `p`. -# Input -* `M`: a manifold ``\mathcal M`` -* `f`: a cost function ``f: \mathcal M → ℝ`` to minimize -* `grad_f`: the gradient ``\operatorname{grad}f: \mathcal M → T \mathcal M`` of ``F`` -* `Hess_f`: (optional) the Hessian ``\operatorname{Hess} f`` -* `p`: an initial value ``p ∈ \mathcal M`` - -For the case that no Hessian is provided, the Hessian is computed using finite difference, see -[`ApproxHessianFiniteDifference`](@ref). - -for more details and all options, see [`trust_regions`](@ref) -""" +@doc "$(_doc_TR)" trust_regions!(M::AbstractManifold, args...; kwargs...) + # No Hessian but a point (Any) function trust_regions!( M::AbstractManifold, @@ -589,7 +579,7 @@ function initialize_solver!(mp::AbstractManoptProblem, trs::TrustRegionsState) return trs end -function step_solver!(mp::AbstractManoptProblem, trs::TrustRegionsState, i) +function step_solver!(mp::AbstractManoptProblem, trs::TrustRegionsState, k) M = get_manifold(mp) mho = get_objective(mp) # Determine the initial tangent vector used as start point for the subsolvereta0 diff --git a/test/plans/test_conjugate_gradient_plan.jl b/test/plans/test_conjugate_gradient_plan.jl index 4e270cc5bd..14d658fd07 100644 --- a/test/plans/test_conjugate_gradient_plan.jl +++ b/test/plans/test_conjugate_gradient_plan.jl @@ -1,7 +1,7 @@ using Manopt, Manifolds, Test struct DummyCGCoeff <: DirectionUpdateRule end -(u::DummyCGCoeff)(p, s, i) = 0.2 +(u::DummyCGCoeff)(p, s, k) = 0.2 Manopt.update_rule_storage_points(::DummyCGCoeff) = Tuple{} Manopt.update_rule_storage_vectors(::DummyCGCoeff) = Tuple{} diff --git a/test/plans/test_conjugate_residual_plan.jl b/test/plans/test_conjugate_residual_plan.jl index 65b0d13017..a60ffa8d30 100644 --- a/test/plans/test_conjugate_residual_plan.jl +++ b/test/plans/test_conjugate_residual_plan.jl @@ -23,8 +23,8 @@ using Manifolds, Manopt, Test @test get_cost(TpM, slso, X0) ≈ cost_value @test get_cost(TpM, slso2, X0) ≈ cost_value - @test Manopt.get_b(TpM, slso, X0) == bv - @test Manopt.get_b(TpM, slso2, X0) == bv + @test Manopt.get_b(TpM, slso) == bv + @test Manopt.get_b(TpM, slso2) == bv @test get_gradient(TpM, slso, X0) == grad_value @test get_gradient(TpM, slso2, X0) == grad_value diff --git a/test/plans/test_stopping_criteria.jl b/test/plans/test_stopping_criteria.jl index 8fa26fa11b..156a24f212 100644 --- a/test/plans/test_stopping_criteria.jl +++ b/test/plans/test_stopping_criteria.jl @@ -274,7 +274,7 @@ struct DummyStoppingCriterion <: StoppingCriterion end s.p = p @test sc1(mp, s, 1) #always returns true since `f` is always NaN s.p = [0.0, 0.1] - @test !sc1(mp, s, 0) # test reset – triggers again + @test !sc1(mp, s, 0) # test reset. triggers again @test length(get_reason(sc1)) == 0 @test sc1.at_iteration == -1 # Trigger manually @@ -287,7 +287,7 @@ struct DummyStoppingCriterion <: StoppingCriterion end @test sc2(mp, s, 1) #always returns true since p was now set to NaN @test length(get_reason(sc2)) > 0 s.p = p - @test !sc2(mp, s, 0) # test reset, though this als already triggers + @test !sc2(mp, s, 0) # test reset, though this already again triggers @test length(get_reason(sc2)) == 0 # verify reset @test sc2.at_iteration == -1 # Trigger manually diff --git a/test/solvers/test_ChambollePock.jl b/test/solvers/test_ChambollePock.jl index 532b491db1..5c9c4cd976 100644 --- a/test/solvers/test_ChambollePock.jl +++ b/test/solvers/test_ChambollePock.jl @@ -84,7 +84,7 @@ using ManifoldDiff: prox_distance, prox_distance! linearized_forward_operator=DΛ, relax=:dual, variant=:linearized, - update_dual_base=(p, o, i) -> o.n, + update_dual_base=(p, o, k) -> o.n, ) @test o2a ≈ o3 end diff --git a/test/solvers/test_adaptive_regularization_with_cubics.jl b/test/solvers/test_adaptive_regularization_with_cubics.jl index 8b6a65589f..2ad5862532 100644 --- a/test/solvers/test_adaptive_regularization_with_cubics.jl +++ b/test/solvers/test_adaptive_regularization_with_cubics.jl @@ -224,7 +224,8 @@ include("../utils/example_tasks.jl") ) @test isapprox(M, p_min, q3) - # test that we do not het nan if we start at the minimizer + # test that this still returns the minimizer, that is when starting + # at the minimizer r1 = adaptive_regularization_with_cubics(M, f, grad_f, Hess_f, p_min) end diff --git a/tutorials/HowToDebug.qmd b/tutorials/HowToDebug.qmd index 1683140aaf..9dcc6b1297 100644 --- a/tutorials/HowToDebug.qmd +++ b/tutorials/HowToDebug.qmd @@ -184,8 +184,8 @@ mutable struct DebugDivider{TIO<:IO} <: DebugAction divider::String DebugDivider(divider=" | "; io::IO=stdout) = new{typeof(io)}(io, divider) end -function (d::DebugDivider)(::AbstractManoptProblem, ::AbstractManoptSolverState, i::Int) - (i >= 0) && (!isempty(d.divider)) && (print(d.io, d.divider)) +function (d::DebugDivider)(::AbstractManoptProblem, ::AbstractManoptSolverState, k::Int) + (k >= 0) && (!isempty(d.divider)) && (print(d.io, d.divider)) return nothing end ``` diff --git a/tutorials/ImplementASolver.qmd b/tutorials/ImplementASolver.qmd index 7c89b38dcb..51e0840cb5 100644 --- a/tutorials/ImplementASolver.qmd +++ b/tutorials/ImplementASolver.qmd @@ -118,7 +118,7 @@ the retraction based on a given manifold. #| output: false function RandomWalkState(M::AbstractManifold, p::P=rand(M); σ = 0.1, - retraction_method::R=default_retraction_method(M), + retraction_method::R=default_retraction_method(M, typeof(p)), stopping_criterion::S=StopAfterIteration(200) ) where {P, R<:AbstractRetractionMethod, S<:StoppingCriterion} return RandomWalkState{P,R,S}(p, copy(M, p), σ, retraction_method, stopping_criterion)