Merge pull request #98 from chrhansk/hotfix-executor-errors

Fix errors in runners
chrhansk · Jun 7, 2024 · 661548b · 661548b
2 parents bf8b963 + 12f0acd
commit 661548b
Show file tree

Hide file tree

Showing 11 changed files with 134 additions and 116 deletions.
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -20,7 +20,7 @@ build:
       - pip install poetry
 
     post_install:
-      - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with docs
+      - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with docs --without solvers
 
 # Build documentation in the "docs/" directory with Sphinx
 sphinx:

diff --git a/docs/conf.py b/docs/conf.py
@@ -14,7 +14,7 @@
 project = "pygradflow"
 copyright = "2023, Christoph Hansknecht"
 author = "Christoph Hansknecht"
-release = "0.5.5"
+release = "0.5.6"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

diff --git a/pygradflow/result.py b/pygradflow/result.py
@@ -40,7 +40,8 @@ def status(self) -> SolverStatus:
         return self._status
 
     def __getattr__(self, name):
-        return self._attrs.get(name, None)
+        attrs = super().__getattribute__("_attrs")
+        return attrs.get(name, None)
 
     def __setitem__(self, name, value):
         self._attrs[name] = value

diff --git a/pygradflow/runners/instance.py b/pygradflow/runners/instance.py
@@ -1,7 +1,5 @@
 from abc import ABC, abstractmethod
 
-import numpy as np
-
 from pygradflow.solver import Solver
 
 
@@ -29,4 +27,4 @@ def x0(self):
         raise NotImplementedError()
 
     def y0(self):
-        return np.zeros((self.num_cons,))
+        return 0.0
diff --git a/pygradflow/runners/runner.py b/pygradflow/runners/runner.py
@@ -5,7 +5,7 @@
 import os
 from abc import ABC, abstractmethod
 from concurrent.futures import FIRST_COMPLETED, ProcessPoolExecutor, wait
-from multiprocessing import cpu_count
+from multiprocessing import Process, Queue, cpu_count
 
 import numpy as np
 
@@ -21,8 +21,6 @@
 def solve_instance(instance, params, log_filename, verbose):
     logger.handlers.clear()
 
-    np.seterr(divide="raise", over="raise", invalid="raise")
-
     handler = logging.FileHandler(log_filename)
     handler.setFormatter(formatter)
     logger.addHandler(handler)
@@ -39,32 +37,40 @@ def solve_instance(instance, params, log_filename, verbose):
     warn_logger.addHandler(handler)
     warn_logger.setLevel(logging.WARN)
 
-    return instance.solve(params)
+    try:
+        np.seterr(divide="raise", over="raise", invalid="raise")
+        result = instance.solve(params)
+        return (instance, result)
+    except Exception as exc:
+        logger.error("Error solving %s", instance.name, exc_info=exc)
+        return (instance, "error")
+
 
+def solve_instance_queue(queue, instance, params, log_filename, verbose):
+    instance_result = solve_instance(instance, params, log_filename, verbose)
+    queue.put(instance_result)
 
-def try_solve_instance(instance, params, log_filename, verbose):
-    try:
-        # No time limit
-        if params.time_limit == np.inf:
-            return (instance, solve_instance(instance, params, log_filename, verbose))
 
-        with ProcessPoolExecutor(1) as pool:
-            future = pool.submit(
-                solve_instance, instance, params, log_filename, verbose
-            )
-            done, _ = wait([future], timeout=(params.time_limit + 10))
+def solve_instance_time_limit(instance, params, log_filename, verbose):
+    if params.time_limit == np.inf:
+        return solve_instance(instance, params, log_filename, verbose)
 
-            if len(done) == 0:
-                logger.error("Reached timeout, aborting")
-                return (instance, "timeout")
+    queue = Queue()
+    process = Process(
+        target=solve_instance_queue,
+        args=(queue, instance, params, log_filename, verbose),
+    )
 
-            result = next(iter(done)).result()
-            return (instance, result)
+    process.start()
+    process.join(timeout=(params.time_limit + 10.0))
 
-    except Exception as exc:
-        logger.error("Error solving %s", instance.name)
-        logger.exception(exc, exc_info=(type(exc), exc, exc.__traceback__))
-        return (instance, "error")
+    if process.is_alive():
+        logger.error("Reached timeout, terminating process for %s", instance.name)
+        process.terminate()
+        logger.error("Terminated process for %s", instance.name)
+        return (instance, "timeout")
+    else:
+        return queue.get()
 
 
 class Runner(ABC):
@@ -93,7 +99,7 @@ def solve_instances_sequential(self, instances, args, params):
         verbose = args.verbose
         for instance in instances:
             log_filename = self.log_filename(args, instance)
-            yield try_solve_instance(instance, params, log_filename, verbose)
+            yield solve_instance_time_limit(instance, params, log_filename, verbose)
 
     def solve_instances_parallel(self, instances, args, params):
         verbose = args.verbose
@@ -114,16 +120,35 @@ def solve_instances_parallel(self, instances, args, params):
         solve_args = zip(instances, all_params, all_log_filenames, all_verbose)
 
         with ProcessPoolExecutor(num_procs, max_tasks_per_child=1) as pool:
-            futures = [
-                pool.submit(try_solve_instance, *solve_arg) for solve_arg in solve_args
-            ]
 
-            while len(futures) != 0:
+            futures = []
+
+            for solve_arg in solve_args:
+                future = pool.submit(solve_instance_time_limit, *solve_arg)
+                future.instance = solve_arg[0]
+                futures.append(future)
+
+            run_logger.info("Started %d instances", len(futures))
+
+            while True:
                 done, futures = wait(futures, return_when=FIRST_COMPLETED)
 
                 for item in done:
                     yield item.result()
 
+                if len(futures) == 0:
+                    run_logger.info("Solved all instances, terminating")
+                    break
+
+                remaining_future = next(iter(futures))
+
+                run_logger.info(
+                    "Finished %d instances, waiting for %d (including %s)",
+                    len(done),
+                    len(futures),
+                    remaining_future.instance.name,
+                )
+
     def solve_instances(self, instances, args):
         # yields sequence of tuples of (instance, result) for each instance
         run_logger.info("Solving %d instances", len(instances))

diff --git a/pygradflow/solver.py b/pygradflow/solver.py
@@ -193,17 +193,17 @@ def _check_terminate(self, iterate, iteration, timer):
 
     def solve(
         self,
-        x0: Optional[np.ndarray] = None,
-        y0: Optional[np.ndarray] = None,
+        x0: np.ndarray | float | None = None,
+        y0: np.ndarray | float | None = None,
     ) -> SolverResult:
         """
         Solves the problem starting from the given primal / dual point
 
         Parameters
         ----------
-        x0: np.ndarray
+        x0: np.ndarray | float | None
             The initial primal point :math:`x_0 \\in \\mathbb{R}^{n}`
-        y0: np.ndarray
+        y0: np.ndarray | float | None
             The initial dual point :math:`y_0 \\in \\mathbb{R}^{m}`
 
         Returns

diff --git a/pygradflow/step/box_control.py b/pygradflow/step/box_control.py
@@ -254,7 +254,8 @@ def step(
         # x = self.solve_step_scipy(iterate, rho, dt, timer)
         x = self.solve_step_box(iterate, rho, dt, timer)
 
-        cons = problem.cons(x)
+        eval = iterate.eval
+        cons = eval.cons(x)
         w = (-1 / lamb) * cons
         y = iterate.y - w
 

diff --git a/pygradflow/step/box_solver.py b/pygradflow/step/box_solver.py
@@ -1,43 +1,27 @@
+from typing import Callable
+
 import numpy as np
 import scipy as sp
 
-
-# Simple dense BFGS implementation
-# Note: We should use a sparse limited-memory variant
-# storing the approximate inverse Hessian
-class DampedBFGS:
-    def __init__(self, n):
-        self.mat = np.eye(n)
-
-    def update(self, s, y):
-        assert np.linalg.norm(s) > 0
-
-        s_prod = np.dot(self.mat, s)
-
-        prod = np.dot(s, y)
-        bidir_prod = np.dot(s, s_prod)
-
-        assert bidir_prod >= 0.0
-
-        if prod >= 0.2 * bidir_prod:
-            theta = 1
-        else:
-            theta = 0.8 * bidir_prod / (bidir_prod - prod)
-
-        r = theta * y + (1 - theta) * s_prod
-
-        assert np.dot(r, s) > 0
-
-        self.mat -= np.outer(s_prod, s_prod) / bidir_prod
-        self.mat += np.outer(r, r) / np.dot(r, s)
+from pygradflow.eval import EvalError
 
 
 class BoxSolverError(Exception):
     pass
 
 
 # Based on "Projected Newton Methods for Optimization Problems with Simple Constraints"
-def solve_box_constrained(x0, func, grad, hess, lb, ub, max_it=1000, use_bfgs=False):
+def solve_box_constrained(
+    x0: np.ndarray,
+    func: Callable[[np.ndarray], float],
+    grad: Callable[[np.ndarray], np.ndarray],
+    hess: Callable[[np.ndarray], sp.sparse.spmatrix],
+    lb: np.ndarray,
+    ub: np.ndarray,
+    max_it=1000,
+    atol: float = 1e-8,
+    rtol: float = 1e-8,
+):
 
     (n,) = x0.shape
     assert lb.shape == (n,)
@@ -48,22 +32,10 @@ def solve_box_constrained(x0, func, grad, hess, lb, ub, max_it=1000, use_bfgs=Fa
     beta = 0.5
     sigma = 1e-3
 
-    if use_bfgs:
-        bfgs = DampedBFGS(n)
-
-    prev_x = None
-    prev_grad = None
-
     for iteration in range(max_it):
         curr_func = func(curr_x)
         curr_grad = grad(curr_x)
 
-        if prev_x is not None and use_bfgs:
-            s = curr_x - prev_x
-            y = curr_grad - prev_grad
-
-            bfgs.update(s, y)
-
         assert curr_grad.shape == (n,)
 
         at_lower = np.isclose(curr_x, lb)
@@ -72,11 +44,17 @@ def solve_box_constrained(x0, func, grad, hess, lb, ub, max_it=1000, use_bfgs=Fa
         at_upper = np.isclose(curr_x, ub)
         active_upper = np.logical_and(at_upper, curr_grad < 0)
 
-        residuum = -curr_grad
-        residuum[at_lower] = np.maximum(residuum[at_lower], 0)
-        residuum[at_upper] = np.minimum(residuum[at_upper], 0)
+        residuals = -curr_grad
+        residuals[at_lower] = np.maximum(residuals[at_lower], 0)
+        residuals[at_upper] = np.minimum(residuals[at_upper], 0)
+
+        residuum = np.linalg.norm(residuals, ord=np.inf)
+        grad_norm = np.linalg.norm(curr_grad, ord=np.inf)
 
-        if np.linalg.norm(residuum, ord=np.inf) < 1e-8:
+        if grad_norm < atol:
+            break
+
+        if (residuum < atol) or (residuum / grad_norm) < rtol:
             break
 
         active = np.logical_or(active_lower, active_upper)
@@ -85,25 +63,27 @@ def solve_box_constrained(x0, func, grad, hess, lb, ub, max_it=1000, use_bfgs=Fa
         dir = np.zeros((n,))
         inactive_grad = curr_grad[inactive]
 
-        if use_bfgs:
-            curr_hess = bfgs.mat
-            inactive_grad = curr_grad[inactive]
-            inactive_hess = curr_hess[inactive, :][:, inactive]
-            dir[inactive] = np.linalg.solve(inactive_hess, -inactive_grad)
-        else:
-            curr_hess = hess(curr_x)
-            assert curr_hess.shape == (n, n)
-            inactive_hess = curr_hess.tocsr()[inactive, :][:, inactive]
-            dir[inactive] = sp.sparse.linalg.spsolve(inactive_hess, -inactive_grad)
+        curr_hess = hess(curr_x)
+        assert curr_hess.shape == (n, n)
+        inactive_hess = curr_hess.tocsr()[inactive, :][:, inactive]
+        dir[inactive] = sp.sparse.linalg.spsolve(inactive_hess, -inactive_grad)
 
         if np.dot(dir, curr_grad) >= 0:
-            raise BoxSolverError("Hessian not positive definite")
+            raise BoxSolverError("Inactive Hessian not positive definite")
 
         alpha = 1.0
 
+        eval_errors = False
+
         for i in range(20):
             next_x = np.clip(curr_x + alpha * dir, lb, ub)
-            next_func = func(next_x)
+
+            try:
+                next_func = func(next_x)
+            except (ArithmeticError, EvalError):
+                eval_errors = True
+                alpha *= beta
+                continue
 
             rhs = alpha * np.dot(curr_grad[inactive], dir[inactive])
 
@@ -116,10 +96,9 @@ def solve_box_constrained(x0, func, grad, hess, lb, ub, max_it=1000, use_bfgs=Fa
 
             alpha *= beta
         else:
-            raise Exception("Line search failed")
-
-        prev_grad = curr_grad
-        prev_x = curr_x
+            if eval_errors:
+                raise BoxSolverError("Line search failed")
+            raise Exception("Line search did not converge")
 
         curr_x = next_x