Merge pull request #83 from chrhansk/feature-box-solver

Add custom box-constrained solver
chrhansk · May 14, 2024 · c238bdb · c238bdb
2 parents 31f7533 + ab27eb0
commit c238bdb
Show file tree

Hide file tree

Showing 4 changed files with 220 additions and 24 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -14,7 +14,7 @@
 project = "pygradflow"
 copyright = "2023, Christoph Hansknecht"
 author = "Christoph Hansknecht"
-release = "0.4.13"
+release = "0.4.14"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

diff --git a/pygradflow/step/box_control.py b/pygradflow/step/box_control.py
@@ -1,7 +1,6 @@
-import time
-
 import cyipopt
 import numpy as np
+import scipy as sp
 from scipy.optimize import Bounds, minimize
 
 from pygradflow.implicit_func import ImplicitFunc
@@ -12,11 +11,6 @@
     StepSolverError,
 )
 
-max_num_it = 10000
-max_num_linesearch_it = 40
-t_init = 1.0
-beta = 0.5
-
 
 class BoxReducedProblem(cyipopt.Problem):
     """
@@ -63,13 +57,37 @@ def constraint(self, x):
     def jacobian(self, x):
         return np.empty((0, self.num_vars))
 
+    def hessianstructure(self):
+        x0 = self.x0
+        hess = self.step_controller.hessian(self.iterate, x0, self.lamb, self.rho)
+        hess = hess.tocoo()
+        rows = hess.row
+        cols = hess.col
+
+        hess_filter = rows >= cols
+
+        return rows[hess_filter], cols[hess_filter]
+
+    def hessian(self, x, lag, obj_factor):
+        hess = self.step_controller.hessian(self.iterate, x, self.lamb, self.rho)
+        hess = hess.tocoo()
+
+        rows = hess.row
+        cols = hess.col
+        data = hess.data
+
+        hess_filter = rows >= cols
+
+        return data[hess_filter]
+
     def set_options(self, timer):
         import logging
 
         logging.getLogger("cyipopt").setLevel(logging.WARNING)
         self.add_option("print_level", 0)
-        # self.add_option("derivative_test", "first-order")
-        self.add_option("hessian_approximation", "limited-memory")
+        # self.add_option("derivative_test", "second-order")
+        # self.add_option("hessian_approximation", "limited-memory")
+        # self.add_option("max_iter", 10)
 
         remaining = timer.remaining()
 
@@ -78,11 +96,13 @@ def set_options(self, timer):
         elif np.isfinite(remaining):
             self.add_option("max_cpu_time", remaining)
 
-    def solve(self, timer):
-        iterate = self.iterate
+    @property
+    def x0(self):
+        return self.iterate.x
 
+    def solve(self, timer):
         self.set_options(timer)
-        x0 = iterate.x
+        x0 = self.x0
 
         # Solve using Ipopt
         x, info = super().solve(x0)
@@ -96,11 +116,12 @@ def solve(self, timer):
 class BoxReducedController(StepController):
 
     def objective(self, iterate, x, lamb, rho):
+        eval = iterate.eval
         xhat = iterate.x
         yhat = iterate.y
 
-        obj = self.problem.obj(x)
-        cons = self.problem.cons(x)
+        obj = eval.obj(x)
+        cons = eval.cons(x)
 
         dx = x - xhat
         dx_norm_sq = np.dot(dx, dx)
@@ -115,18 +136,36 @@ def objective(self, iterate, x, lamb, rho):
         return curr_obj
 
     def gradient(self, iterate, x, lamb, rho):
+        eval = iterate.eval
         xhat = iterate.x
         yhat = iterate.y
 
-        obj_grad = self.problem.obj_grad(x)
-        cons = self.problem.cons(x)
-        cons_jac = self.problem.cons_jac(x)
+        obj_grad = eval.obj_grad(x)
+        cons = eval.cons(x)
+        cons_jac = eval.cons_jac(x)
 
         dx = x - xhat
         cons_jac_factor = (rho + (1 / lamb)) * cons + yhat
         return obj_grad + lamb * dx + cons_jac.T.dot(cons_jac_factor)
 
-    def solve_step(self, iterate, rho, dt, timer):
+    def hessian(self, iterate, x, lamb, rho):
+        eval = iterate.eval
+        (n,) = x.shape
+
+        yhat = iterate.y
+
+        cons = eval.cons(x)
+        jac = eval.cons_jac(x)
+        cons_factor = 1 / lamb + rho
+        y = cons_factor * cons + yhat
+
+        hess = eval.lag_hess(x, y)
+        hess += sp.sparse.diags([lamb], shape=(n, n))
+        hess += cons_factor * (jac.T @ jac)
+
+        return hess
+
+    def solve_step_scipy(self, iterate, rho, dt, timer):
         problem = self.problem
 
         x = iterate.x
@@ -171,6 +210,33 @@ def callback(x):
 
         return result.x
 
+    def solve_step_ipopt(self, iterate, rho, dt, timer):
+        lamb = 1.0 / dt
+        reduced_problem = BoxReducedProblem(self, iterate, lamb, rho)
+        return reduced_problem.solve(timer=timer)
+
+    def solve_step_box(self, iterate, rho, dt, timer):
+        from .box_solver import BoxSolverError, solve_box_constrained
+
+        problem = self.problem
+        lamb = 1.0 / dt
+
+        def objective(x):
+            return self.objective(iterate, x, lamb, rho)
+
+        def gradient(x):
+            return self.gradient(iterate, x, lamb, rho)
+
+        def hessian(x):
+            return self.hessian(iterate, x, lamb, rho)
+
+        try:
+            return solve_box_constrained(
+                iterate.x, objective, gradient, hessian, problem.var_lb, problem.var_ub
+            )
+        except BoxSolverError as e:
+            raise StepSolverError("Box-constrained solver failed to converge") from e
+
     def step(
         self, iterate, rho: float, dt: float, next_steps, display: bool, timer
     ) -> StepControlResult:
@@ -180,13 +246,13 @@ def step(
         problem = self.problem
         params = self.params
 
-        # reduced_problem = BoxReducedProblem(self, iterate, lamb, rho)
-        # x = reduced_problem.solve(timer=timer)
+        # x = self.solve_step_ipopt(iterate, rho, dt, timer)
 
-        # TODO: The minimize function shipped with scipy
+        # Note: The minimize function shipped with scipy
         # do not consistently produce high-quality solutions,
         # causing the optimization of the overall problem to fail.
-        x = self.solve_step(iterate, rho, dt, timer=timer)
+        # x = self.solve_step_scipy(iterate, rho, dt, timer)
+        x = self.solve_step_box(iterate, rho, dt, timer)
 
         cons = problem.cons(x)
         w = (-1 / lamb) * cons

diff --git a/pygradflow/step/box_solver.py b/pygradflow/step/box_solver.py
@@ -0,0 +1,130 @@
+import numpy as np
+import scipy as sp
+
+
+# Simple dense BFGS implementation
+# Note: We should use a sparse limited-memory variant
+# storing the approximate inverse Hessian
+class DampedBFGS:
+    def __init__(self, n):
+        self.mat = np.eye(n)
+
+    def update(self, s, y):
+        assert np.linalg.norm(s) > 0
+
+        s_prod = np.dot(self.mat, s)
+
+        prod = np.dot(s, y)
+        bidir_prod = np.dot(s, s_prod)
+
+        assert bidir_prod >= 0.0
+
+        if prod >= 0.2 * bidir_prod:
+            theta = 1
+        else:
+            theta = 0.8 * bidir_prod / (bidir_prod - prod)
+
+        r = theta * y + (1 - theta) * s_prod
+
+        assert np.dot(r, s) > 0
+
+        self.mat -= np.outer(s_prod, s_prod) / bidir_prod
+        self.mat += np.outer(r, r) / np.dot(r, s)
+
+
+class BoxSolverError(Exception):
+    pass
+
+
+# Based on "Projected Newton Methods for Optimization Problems with Simple Constraints"
+def solve_box_constrained(x0, func, grad, hess, lb, ub, max_it=1000, use_bfgs=False):
+
+    (n,) = x0.shape
+    assert lb.shape == (n,)
+    assert ub.shape == (n,)
+
+    curr_x = np.clip(x0, lb, ub)
+
+    beta = 0.5
+    sigma = 1e-3
+
+    if use_bfgs:
+        bfgs = DampedBFGS(n)
+
+    prev_x = None
+    prev_grad = None
+
+    for iteration in range(max_it):
+        curr_func = func(curr_x)
+        curr_grad = grad(curr_x)
+
+        if prev_x is not None and use_bfgs:
+            s = curr_x - prev_x
+            y = curr_grad - prev_grad
+
+            bfgs.update(s, y)
+
+        assert curr_grad.shape == (n,)
+
+        at_lower = np.isclose(curr_x, lb)
+        active_lower = np.logical_and(at_lower, curr_grad > 0)
+
+        at_upper = np.isclose(curr_x, ub)
+        active_upper = np.logical_and(at_upper, curr_grad < 0)
+
+        residuum = -curr_grad
+        residuum[at_lower] = np.maximum(residuum[at_lower], 0)
+        residuum[at_upper] = np.minimum(residuum[at_upper], 0)
+
+        if np.linalg.norm(residuum, ord=np.inf) < 1e-8:
+            print(f"Converged after {iteration} iterations")
+            break
+
+        active = np.logical_or(active_lower, active_upper)
+        inactive = np.logical_not(active)
+
+        dir = np.zeros((n,))
+        inactive_grad = curr_grad[inactive]
+
+        if use_bfgs:
+            curr_hess = bfgs.mat
+            inactive_grad = curr_grad[inactive]
+            inactive_hess = curr_hess[inactive, :][:, inactive]
+            dir[inactive] = np.linalg.solve(inactive_hess, -inactive_grad)
+        else:
+            curr_hess = hess(curr_x)
+            assert curr_hess.shape == (n, n)
+            inactive_hess = curr_hess.tocsr()[inactive, :][:, inactive]
+            dir[inactive] = sp.sparse.linalg.spsolve(inactive_hess, -inactive_grad)
+
+        if np.dot(dir, curr_grad) >= 0:
+            raise BoxSolverError("Hessian not positive definite")
+
+        alpha = 1.0
+
+        for i in range(20):
+            next_x = np.clip(curr_x + alpha * dir, lb, ub)
+            next_func = func(next_x)
+
+            rhs = alpha * np.dot(curr_grad[inactive], dir[inactive])
+
+            rhs += np.dot(curr_grad[active], curr_x[active] - next_x[active])
+
+            func_diff = curr_func - next_func
+
+            if func_diff >= sigma * rhs:
+                break
+
+            alpha *= beta
+        else:
+            raise Exception("Line search failed")
+
+        prev_grad = curr_grad
+        prev_x = curr_x
+
+        curr_x = next_x
+
+    else:
+        raise BoxSolverError(f"Did not converge after {max_it} iterations")
+
+    return curr_x
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pygradflow"
-version = "0.4.13"
+version = "0.4.14"
 description = "PyGradFlow is a simple implementation of the sequential homotopy method to be used to solve general nonlinear programs."
 authors = ["Christoph Hansknecht <[email protected]>"]
 readme = "README.md"