From eb5641e409a5f643a0c3823bcd82999738bceb9c Mon Sep 17 00:00:00 2001 From: Matthias Als Date: Tue, 18 Jul 2023 13:45:30 +0200 Subject: [PATCH] Improvements to the simulated annealing algorithm (#23) --- anti_clustering/_base.py | 1 - .../simulated_annealing_heuristic.py | 70 ++++++++++++------- examples/evaluation.py | 2 +- 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/anti_clustering/_base.py b/anti_clustering/_base.py index fa3c57d..4c56613 100644 --- a/anti_clustering/_base.py +++ b/anti_clustering/_base.py @@ -37,7 +37,6 @@ def run( num_groups: int, destination_column: str, ) -> pd.DataFrame: - # pylint: disable = R0913 """ Run anti clustering algorithm on dataset. :param df: The dataset to run anti-clustering on. diff --git a/anti_clustering/simulated_annealing_heuristic.py b/anti_clustering/simulated_annealing_heuristic.py index a880823..c223f92 100644 --- a/anti_clustering/simulated_annealing_heuristic.py +++ b/anti_clustering/simulated_annealing_heuristic.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -A simulated annealing approach to solving the anti-clustering problem. +A simulated annealing with restarts approach to solving the anti-clustering problem. """ import math @@ -23,7 +23,7 @@ class SimulatedAnnealingHeuristicAntiClustering(ClusterSwapHeuristic): """ - A simulated annealing approach to solving the anti-clustering problem. + A simulated annealing with restarts approach to solving the anti-clustering problem. """ def __init__( @@ -32,13 +32,15 @@ def __init__( random_seed: int = None, alpha: float = 0.9, iterations: int = 2000, - starting_temperature: float = 10, + starting_temperature: float = 100, + restarts: int = 9, ): # pylint: disable = R0913 super().__init__(verbose=verbose, random_seed=random_seed) self.alpha = alpha self.iterations = iterations self.starting_temperature = starting_temperature + self.restarts = restarts def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.NDArray[bool]: # Start with random cluster assignment @@ -47,34 +49,48 @@ def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.ND if self.verbose: print("Solving") - temperature = self.starting_temperature - # Initial objective value - objective = self._calculate_objective(cluster_assignment, distance_matrix) - for iteration in range(self.iterations): - if self.verbose and iteration % 5 == 0: - print(f"Iteration {iteration + 1} of {self.iterations}") + candidate_solutions = [] - # Select random element - i = self.rnd.randint(0, len(distance_matrix) - 1) - # Get possible swaps - possible_exchanges = self._get_exchanges(cluster_assignment, i) - if len(possible_exchanges) == 0: - continue - # Select random possible swap. - j = possible_exchanges[self.rnd.randint(0, len(possible_exchanges) - 1)] + for restart in range(self.restarts): + temperature = self.starting_temperature + # Initial objective value + objective = self._calculate_objective(cluster_assignment, distance_matrix) + for iteration in range(self.iterations): + if self.verbose and iteration % 5 == 0: + print(f"Iteration {iteration + 1} of {self.iterations}") - new_cluster_assignment = self._swap(cluster_assignment, i, j) - new_objective = self._calculate_objective(new_cluster_assignment, distance_matrix) + # Select random element + i = self.rnd.randint(0, len(distance_matrix) - 1) + # Get possible swaps + possible_exchanges = self._get_exchanges(cluster_assignment, i) + if len(possible_exchanges) == 0: + continue + # Select random possible swap. + j = possible_exchanges[self.rnd.randint(0, len(possible_exchanges) - 1)] - # Select solution as current if accepted - if self._accept(new_objective - objective, temperature): - objective = new_objective - cluster_assignment = new_cluster_assignment + new_cluster_assignment = self._swap(cluster_assignment, i, j) + new_objective = self._calculate_objective(new_cluster_assignment, distance_matrix) - # Cool down temperature - temperature = temperature * self.alpha + # Select solution as current if accepted + if self._accept(new_objective - objective, temperature): + objective = new_objective + cluster_assignment = new_cluster_assignment - return cluster_assignment + # Cool down temperature + temperature = temperature * self.alpha + + candidate_solutions.append((objective, cluster_assignment)) + + if self.verbose: + print(f"Restart {restart + 1} of {self.restarts}") + + # Cold restart, select random cluster assignment + cluster_assignment = self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix)) + + # Select best solution, maximizing objective + _, best_cluster_assignment = max(candidate_solutions, key=lambda x: x[0]) + + return best_cluster_assignment def _calculate_objective(self, cluster_assignment: npt.NDArray[bool], distance_matrix: npt.NDArray[float]) -> float: """ @@ -87,7 +103,7 @@ def _calculate_objective(self, cluster_assignment: npt.NDArray[bool], distance_m def _accept(self, delta: float, temperature: float) -> bool: """ - Simulated annealing acceptance function. Notice d/t is negated because this is a maximisation problem. + Simulated annealing acceptance function. Notice d/t is used instead of -d/t because we are maximizing. :param delta: Difference in objective :param temperature: Current temperature :return: Whether the solution is accepted or not. diff --git a/examples/evaluation.py b/examples/evaluation.py index 744c167..4d86d9b 100644 --- a/examples/evaluation.py +++ b/examples/evaluation.py @@ -37,7 +37,7 @@ methods: List[AntiClustering] = [ ExchangeHeuristicAntiClustering(), - SimulatedAnnealingHeuristicAntiClustering(alpha=0.95, iterations=10000, starting_temperature=10000), + SimulatedAnnealingHeuristicAntiClustering(alpha=0.95, iterations=5000, starting_temperature=1000, restarts=15), NaiveRandomHeuristicAntiClustering(), ExactClusterEditingAntiClustering(), ]