diff --git a/pygradflow/step/box_control.py b/pygradflow/step/box_control.py index ffe5a24..f949ac9 100644 --- a/pygradflow/step/box_control.py +++ b/pygradflow/step/box_control.py @@ -1,5 +1,3 @@ -import time - import cyipopt import numpy as np import scipy as sp @@ -13,11 +11,6 @@ StepSolverError, ) -max_num_it = 10000 -max_num_linesearch_it = 40 -t_init = 1.0 -beta = 0.5 - class BoxReducedProblem(cyipopt.Problem): """ @@ -71,7 +64,7 @@ def hessianstructure(self): rows = hess.row cols = hess.col - hess_filter = (rows >= cols) + hess_filter = rows >= cols return rows[hess_filter], cols[hess_filter] @@ -83,7 +76,7 @@ def hessian(self, x, lag, obj_factor): cols = hess.col data = hess.data - hess_filter = (rows >= cols) + hess_filter = rows >= cols return data[hess_filter] @@ -108,8 +101,6 @@ def x0(self): return self.iterate.x def solve(self, timer): - iterate = self.iterate - self.set_options(timer) x0 = self.x0 @@ -165,7 +156,7 @@ def hessian(self, iterate, x, lamb, rho): cons = eval.cons(x) jac = eval.cons_jac(x) - cons_factor = (1 / lamb + rho) + cons_factor = 1 / lamb + rho y = cons_factor * cons + yhat hess = eval.lag_hess(x, y) @@ -224,6 +215,28 @@ def solve_step_ipopt(self, iterate, rho, dt, timer): reduced_problem = BoxReducedProblem(self, iterate, lamb, rho) return reduced_problem.solve(timer=timer) + def solve_step_box(self, iterate, rho, dt, timer): + from .box_solver import BoxSolverError, solve_box_constrained + + problem = self.problem + lamb = 1.0 / dt + + def objective(x): + return self.objective(iterate, x, lamb, rho) + + def gradient(x): + return self.gradient(iterate, x, lamb, rho) + + def hessian(x): + return self.hessian(iterate, x, lamb, rho) + + try: + return solve_box_constrained( + iterate.x, objective, gradient, hessian, problem.var_lb, problem.var_ub + ) + except BoxSolverError as e: + raise StepSolverError("Box-constrained solver failed to converge") from e + def step( self, iterate, rho: float, dt: float, next_steps, display: bool, timer ) -> StepControlResult: @@ -238,8 +251,8 @@ def step( # Note: The minimize function shipped with scipy # do not consistently produce high-quality solutions, # causing the optimization of the overall problem to fail. - - x = self.solve_step_scipy(iterate, rho, dt, timer) + # x = self.solve_step_scipy(iterate, rho, dt, timer) + x = self.solve_step_box(iterate, rho, dt, timer) cons = problem.cons(x) w = (-1 / lamb) * cons diff --git a/pygradflow/step/box_solver.py b/pygradflow/step/box_solver.py new file mode 100644 index 0000000..ef07e45 --- /dev/null +++ b/pygradflow/step/box_solver.py @@ -0,0 +1,130 @@ +import numpy as np +import scipy as sp + + +# Simple dense BFGS implementation +# Note: We should use a sparse limited-memory variant +# storing the approximate inverse Hessian +class DampedBFGS: + def __init__(self, n): + self.mat = np.eye(n) + + def update(self, s, y): + assert np.linalg.norm(s) > 0 + + s_prod = np.dot(self.mat, s) + + prod = np.dot(s, y) + bidir_prod = np.dot(s, s_prod) + + assert bidir_prod >= 0.0 + + if prod >= 0.2 * bidir_prod: + theta = 1 + else: + theta = 0.8 * bidir_prod / (bidir_prod - prod) + + r = theta * y + (1 - theta) * s_prod + + assert np.dot(r, s) > 0 + + self.mat -= np.outer(s_prod, s_prod) / bidir_prod + self.mat += np.outer(r, r) / np.dot(r, s) + + +class BoxSolverError(Exception): + pass + + +# Based on "Projected Newton Methods for Optimization Problems with Simple Constraints" +def solve_box_constrained(x0, func, grad, hess, lb, ub, max_it=1000, use_bfgs=False): + + (n,) = x0.shape + assert lb.shape == (n,) + assert ub.shape == (n,) + + curr_x = np.clip(x0, lb, ub) + + beta = 0.5 + sigma = 1e-3 + + if use_bfgs: + bfgs = DampedBFGS(n) + + prev_x = None + prev_grad = None + + for iteration in range(max_it): + curr_func = func(curr_x) + curr_grad = grad(curr_x) + + if prev_x is not None and use_bfgs: + s = curr_x - prev_x + y = curr_grad - prev_grad + + bfgs.update(s, y) + + assert curr_grad.shape == (n,) + + at_lower = np.isclose(curr_x, lb) + active_lower = np.logical_and(at_lower, curr_grad > 0) + + at_upper = np.isclose(curr_x, ub) + active_upper = np.logical_and(at_upper, curr_grad < 0) + + residuum = -curr_grad + residuum[at_lower] = np.maximum(residuum[at_lower], 0) + residuum[at_upper] = np.minimum(residuum[at_upper], 0) + + if np.linalg.norm(residuum, ord=np.inf) < 1e-8: + print(f"Converged after {iteration} iterations") + break + + active = np.logical_or(active_lower, active_upper) + inactive = np.logical_not(active) + + dir = np.zeros((n,)) + inactive_grad = curr_grad[inactive] + + if use_bfgs: + curr_hess = bfgs.mat + inactive_grad = curr_grad[inactive] + inactive_hess = curr_hess[inactive, :][:, inactive] + dir[inactive] = np.linalg.solve(inactive_hess, -inactive_grad) + else: + curr_hess = hess(curr_x) + assert curr_hess.shape == (n, n) + inactive_hess = curr_hess.tocsr()[inactive, :][:, inactive] + dir[inactive] = sp.sparse.linalg.spsolve(inactive_hess, -inactive_grad) + + if np.dot(dir, curr_grad) >= 0: + raise BoxSolverError("Hessian not positive definite") + + alpha = 1.0 + + for i in range(20): + next_x = np.clip(curr_x + alpha * dir, lb, ub) + next_func = func(next_x) + + rhs = alpha * np.dot(curr_grad[inactive], dir[inactive]) + + rhs += np.dot(curr_grad[active], curr_x[active] - next_x[active]) + + func_diff = curr_func - next_func + + if func_diff >= sigma * rhs: + break + + alpha *= beta + else: + raise Exception("Line search failed") + + prev_grad = curr_grad + prev_x = curr_x + + curr_x = next_x + + else: + raise BoxSolverError(f"Did not converge after {max_it} iterations") + + return curr_x